From 00fab2596644f191b96f29a9b1f4a11f219d80cf Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Wed, 30 Jan 2019 16:30:36 -0800 Subject: [PATCH] HIVE-21189 : hive.merge.nway.joins should default to false Signed-off-by: Ashutosh Chauhan --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- data/conf/perf-reg/spark/hive-site.xml | 6 + data/conf/spark/local/hive-site.xml | 6 + data/conf/spark/standalone/hive-site.xml | 5 + data/conf/spark/yarn-cluster/hive-site.xml | 6 + .../queries/clientpositive/map_join_on_filter.q | 4 +- .../subquery_scalar_multi_rows.q.out | 3 +- .../clientpositive/annotate_stats_join.q.out | 99 +- .../clientpositive/annotate_stats_join_pkfk.q.out | 124 +- .../test/results/clientpositive/auto_join12.q.out | 74 +- .../test/results/clientpositive/auto_join20.q.out | 296 ++++- .../test/results/clientpositive/auto_join21.q.out | 155 ++- .../test/results/clientpositive/auto_join28.q.out | 408 ++++-- .../test/results/clientpositive/auto_join29.q.out | 788 ++++++++---- .../test/results/clientpositive/auto_join3.q.out | 112 +- .../test/results/clientpositive/auto_join31.q.out | 81 +- .../test/results/clientpositive/auto_join7.q.out | 118 +- .../results/clientpositive/auto_join_stats.q.out | 70 +- .../results/clientpositive/auto_join_stats2.q.out | 50 +- .../clientpositive/correlationoptimizer5.q.out | 455 ++++--- .../results/clientpositive/cross_join_merge.q.out | 124 +- .../test/results/clientpositive/empty_join.q.out | 40 +- .../results/clientpositive/explain_logical.q.out | 66 +- .../test/results/clientpositive/fold_to_null.q.out | 70 +- .../results/clientpositive/infer_bucket_sort.q.out | 6 +- .../infer_bucket_sort_reducers_power_two.q.out | 14 +- .../results/clientpositive/infer_join_preds.q.out | 39 +- ql/src/test/results/clientpositive/join12.q.out | 30 +- ql/src/test/results/clientpositive/join20.q.out | 100 +- ql/src/test/results/clientpositive/join21.q.out | 55 +- ql/src/test/results/clientpositive/join26.q.out | 234 ++-- ql/src/test/results/clientpositive/join28.q.out | 120 +- ql/src/test/results/clientpositive/join3.q.out | 38 +- ql/src/test/results/clientpositive/join40.q.out | 100 +- ql/src/test/results/clientpositive/join45.q.out | 172 ++- ql/src/test/results/clientpositive/join47.q.out | 172 ++- ql/src/test/results/clientpositive/join7.q.out | 49 +- .../results/clientpositive/join_alt_syntax.q.out | 66 +- .../clientpositive/join_cond_pushdown_1.q.out | 66 +- .../clientpositive/join_cond_pushdown_2.q.out | 87 +- .../clientpositive/join_cond_pushdown_3.q.out | 66 +- .../clientpositive/join_cond_pushdown_4.q.out | 87 +- .../join_cond_pushdown_unqual1.q.out | 74 +- .../join_cond_pushdown_unqual2.q.out | 68 +- .../join_cond_pushdown_unqual3.q.out | 74 +- .../join_cond_pushdown_unqual4.q.out | 68 +- .../clientpositive/join_filters_overlap.q.out | 986 +++++++++++++- .../clientpositive/join_grp_diff_keys.q.out | 70 +- .../join_merge_multi_expressions.q.out | 66 +- .../results/clientpositive/join_reorder2.q.out | 68 +- .../results/clientpositive/join_reorder3.q.out | 68 +- .../results/clientpositive/join_reorder4.q.out | 123 +- .../test/results/clientpositive/keep_uniform.q.out | 82 +- .../results/clientpositive/llap/auto_join21.q.out | 92 +- .../results/clientpositive/llap/auto_join29.q.out | 655 +++++----- .../results/clientpositive/llap/auto_join30.q.out | 345 ++--- .../clientpositive/llap/auto_smb_mapjoin_14.q.out | 63 +- .../llap/auto_sortmerge_join_11.q.out | 93 +- .../llap/auto_sortmerge_join_12.q.out | 174 ++- .../llap/auto_sortmerge_join_6.q.out | 134 +- .../llap/auto_sortmerge_join_9.q.out | 126 +- .../llap/bucketizedhiveinputformat.q.out | 3 +- .../clientpositive/llap/column_access_stats.q.out | 31 +- .../llap/correlationoptimizer3.q.out | 165 ++- .../llap/correlationoptimizer4.q.out | 250 ++-- .../llap/correlationoptimizer6.q.out | 124 +- .../results/clientpositive/llap/cross_prod_3.q.out | 54 +- .../llap/dynamic_semijoin_reduction.q.out | 70 +- .../llap/dynamic_semijoin_reduction_4.q.out | 78 +- .../llap/dynamic_semijoin_reduction_sw.q.out | 152 ++- .../llap/dynamic_semijoin_user_level.q.out | 170 +-- .../results/clientpositive/llap/empty_join.q.out | 44 +- .../clientpositive/llap/explainuser_1.q.out | 1358 +++++++++++--------- .../llap/hybridgrace_hashjoin_2.q.out | 576 +++++---- .../clientpositive/llap/join32_lessSize.q.out | 40 +- .../llap/join_is_not_distinct_from.q.out | 202 +-- .../clientpositive/llap/join_nullsafe.q.out | 202 +-- .../results/clientpositive/llap/keep_uniform.q.out | 183 +-- .../results/clientpositive/llap/lineage2.q.out | 2 +- .../results/clientpositive/llap/mapjoin_hint.q.out | 75 +- .../llap/materialized_view_rewrite_2.q.out | 3 +- .../llap/materialized_view_rewrite_6.q.out | 25 +- .../llap/materialized_view_rewrite_7.q.out | 172 ++- .../llap/materialized_view_rewrite_part_2.q.out | 3 +- .../results/clientpositive/llap/mergejoin.q.out | 3 +- ql/src/test/results/clientpositive/llap/mrr.q.out | 84 +- .../results/clientpositive/llap/semijoin.q.out | 498 ++++--- .../clientpositive/llap/semijoin_hint.q.out | 78 +- .../clientpositive/llap/semijoin_reddedup.q.out | 94 +- .../results/clientpositive/llap/skewjoin.q.out | 170 ++- .../clientpositive/llap/smb_mapjoin_14.q.out | 63 +- .../clientpositive/llap/smb_mapjoin_17.q.out | 917 ++++++++++--- .../clientpositive/llap/smb_mapjoin_4.q.out | 706 ++++++---- .../clientpositive/llap/smb_mapjoin_5.q.out | 706 ++++++---- .../clientpositive/llap/smb_mapjoin_6.q.out | 65 +- .../clientpositive/llap/subquery_multi.q.out | 56 +- .../clientpositive/llap/subquery_notin.q.out | 84 +- .../clientpositive/llap/subquery_scalar.q.out | 324 +++-- .../clientpositive/llap/subquery_select.q.out | 160 ++- .../clientpositive/llap/subquery_views.q.out | 110 +- .../llap/tez_dynpart_hashjoin_2.q.out | 141 +- .../clientpositive/llap/tez_smb_empty.q.out | 140 +- .../clientpositive/llap/tez_smb_reduce_side.q.out | 80 +- .../results/clientpositive/llap/tez_union.q.out | 103 +- .../llap/tez_vector_dynpart_hashjoin_2.q.out | 141 +- .../llap/vector_auto_smb_mapjoin_14.q.out | 89 +- .../clientpositive/llap/vector_complex_all.q.out | 91 +- .../clientpositive/llap/vector_join30.q.out | 1158 ++++++++++------- .../clientpositive/llap/vector_nullsafe_join.q.out | 484 ++++--- .../vectorized_dynamic_semijoin_reduction.q.out | 78 +- ql/src/test/results/clientpositive/mapjoin47.q.out | 247 +++- .../test/results/clientpositive/mapjoin_hook.q.out | 6 +- .../results/clientpositive/mapjoin_subquery.q.out | 164 +-- .../results/clientpositive/mapjoin_subquery2.q.out | 50 +- ql/src/test/results/clientpositive/mergejoin.q.out | 3 +- .../test/results/clientpositive/mergejoins.q.out | 104 +- .../results/clientpositive/mergejoins_mixed.q.out | 301 ++++- .../clientpositive/optimize_filter_literal.q.out | 3 +- ql/src/test/results/clientpositive/ppd_join3.q.out | 64 +- .../results/clientpositive/ppd_outer_join4.q.out | 62 +- .../runtime_skewjoin_mapjoin_spark.q.out | 363 +++--- ql/src/test/results/clientpositive/skewjoin.q.out | 342 +++-- .../results/clientpositive/skewjoin_mapjoin4.q.out | 90 +- .../clientpositive/skewjoin_union_remove_2.q.out | 77 +- .../test/results/clientpositive/skewjoinopt7.q.out | 92 +- .../test/results/clientpositive/skewjoinopt8.q.out | 92 +- .../results/clientpositive/smb_mapjoin_25.q.out | 291 ++++- .../clientpositive/stat_estimate_drill.q.out | 3 +- .../clientpositive/subquery_notin_having.q.out | 51 +- .../tez/hybridgrace_hashjoin_2.q.out | 455 ++++--- .../clientpositive/vector_outer_join6.q.out | 4 +- .../clientpositive/vectorization_sum_if_when.q.out | 4 +- 132 files changed, 14375 insertions(+), 7190 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 085bed938b..93ecb8fe7c 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4443,7 +4443,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "How frequently to check for idle Spark sessions. Minimum value is 60 seconds."), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), - HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, + HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", false, "Merge adjacent joins into a single n-way join"), HIVE_LOG_N_RECORDS("hive.log.every.n.records", 0L, new RangeValidator(0L, null), "If value is greater than 0 logs in fixed intervals of size n rather than exponentially."), diff --git a/data/conf/perf-reg/spark/hive-site.xml b/data/conf/perf-reg/spark/hive-site.xml index f6ba664d4e..40c306ca9a 100644 --- a/data/conf/perf-reg/spark/hive-site.xml +++ b/data/conf/perf-reg/spark/hive-site.xml @@ -168,6 +168,12 @@ + + hive.merge.nway.joins + true + + + hive.auto.convert.join true diff --git a/data/conf/spark/local/hive-site.xml b/data/conf/spark/local/hive-site.xml index 8976252ec8..a56ce61785 100644 --- a/data/conf/spark/local/hive-site.xml +++ b/data/conf/spark/local/hive-site.xml @@ -158,6 +158,12 @@ + + hive.merge.nway.joins + true + + + hive.auto.convert.join false diff --git a/data/conf/spark/standalone/hive-site.xml b/data/conf/spark/standalone/hive-site.xml index c239447060..27cbd07db2 100644 --- a/data/conf/spark/standalone/hive-site.xml +++ b/data/conf/spark/standalone/hive-site.xml @@ -158,6 +158,11 @@ + + hive.merge.nway.joins + true + + hive.auto.convert.join false diff --git a/data/conf/spark/yarn-cluster/hive-site.xml b/data/conf/spark/yarn-cluster/hive-site.xml index 8080cf4c4e..4ce6b10586 100644 --- a/data/conf/spark/yarn-cluster/hive-site.xml +++ b/data/conf/spark/yarn-cluster/hive-site.xml @@ -158,6 +158,12 @@ + + hive.merge.nway.joins + true + + + hive.auto.convert.join false diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/map_join_on_filter.q b/itests/hive-blobstore/src/test/queries/clientpositive/map_join_on_filter.q index 55b366d31a..94e5709bdf 100644 --- a/itests/hive-blobstore/src/test/queries/clientpositive/map_join_on_filter.q +++ b/itests/hive-blobstore/src/test/queries/clientpositive/map_join_on_filter.q @@ -1,5 +1,5 @@ -- Test MAPJOIN with filters in the ON condition - +set hive.merge.nway.joins=false; DROP TABLE src_a_data; CREATE TABLE src_a_data ( key int, @@ -13,4 +13,4 @@ SELECT /*+ MAPJOIN(src1, src2) */ * FROM src_a_data src1 RIGHT OUTER JOIN src_a_data src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src_a_data src3 ON (src2.key = src3.key AND src3.key < 10) -SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; \ No newline at end of file +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; diff --git a/ql/src/test/results/clientnegative/subquery_scalar_multi_rows.q.out b/ql/src/test/results/clientnegative/subquery_scalar_multi_rows.q.out index 0a780db7ef..92f1365e23 100644 --- a/ql/src/test/results/clientnegative/subquery_scalar_multi_rows.q.out +++ b/ql/src/test/results/clientnegative/subquery_scalar_multi_rows.q.out @@ -1,4 +1,5 @@ -Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_name from part where p_size > (select p_size from part) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out index 798644f5c0..8c9109f0ca 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -433,7 +433,8 @@ POSTHOOK: Input: default@emp_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -473,6 +474,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) TableScan alias: e1 filterExpr: deptid is not null (type: boolean) @@ -494,11 +521,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: int) - 1 _col0 (type: int) - 2 _col1 (type: int) + 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 768 Data size: 225024 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -529,7 +554,8 @@ POSTHOOK: Input: default@loc #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -569,6 +595,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) TableScan alias: l filterExpr: locid is not null (type: boolean) @@ -590,11 +642,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: int) - 1 _col0 (type: int) - 2 _col1 (type: int) + 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -625,7 +675,8 @@ POSTHOOK: Input: default@loc #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -664,6 +715,32 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: string) TableScan alias: l filterExpr: (locid is not null and state is not null) (type: boolean) @@ -685,11 +762,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string), _col1 (type: int) - 1 _col1 (type: string), _col0 (type: int) - 2 _col0 (type: string), _col1 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index eafd49acb8..2cf5b0c456 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -808,7 +808,8 @@ POSTHOOK: Input: default@store_sales_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -846,6 +847,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) TableScan alias: s1 filterExpr: s_store_sk is not null (type: boolean) @@ -866,11 +893,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -903,7 +928,8 @@ POSTHOOK: Input: default@store_sales_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -941,6 +967,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) TableScan alias: s1 filterExpr: (s_store_sk > 1000) (type: boolean) @@ -961,11 +1013,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -998,7 +1048,8 @@ POSTHOOK: Input: default@store_sales_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1036,6 +1087,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 964 Data size: 7576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) TableScan alias: s1 filterExpr: s_store_sk is not null (type: boolean) @@ -1056,11 +1133,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1093,7 +1168,8 @@ POSTHOOK: Input: default@store_sales_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1131,6 +1207,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 321 Data size: 2524 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 321 Data size: 2524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) TableScan alias: s1 filterExpr: s_store_sk is not null (type: boolean) @@ -1151,11 +1253,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out index 3ffd6ed49b..124bb7b66d 100644 --- a/ql/src/test/results/clientpositive/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/auto_join12.q.out @@ -25,39 +25,38 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_2:src + $hdt$_0:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_0:src TableScan alias: src filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - $hdt$_0:$hdt$_2:src + $hdt$_0:$hdt$_1:src TableScan alias: src filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) @@ -73,45 +72,50 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: src filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (UDFToDouble(key) < 80.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0,_col3) (type: int) - outputColumnNames: _col0 + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - mode: hash + Select Operator + expressions: hash(_col0,_col3) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/auto_join20.q.out b/ql/src/test/results/clientpositive/auto_join20.q.out index 3fc97b677d..14bf54de2b 100644 --- a/ql/src/test/results/clientpositive/auto_join20.q.out +++ b/ql/src/test/results/clientpositive/auto_join20.q.out @@ -19,20 +19,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: a:src1 Fetch Operator limit: -1 - a:src2 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a:src1 TableScan @@ -43,15 +45,13 @@ STAGE PLANS: predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 20)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - a:src2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: TableScan alias: src2 filterExpr: (key < 10) (type: boolean) @@ -59,17 +59,45 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 20)} + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + a:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a:$INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {(key < 20)} + keys: + 0 _col0 (type: string) + 1 key (type: string) + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -77,34 +105,43 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Right Outer Join 0 to 1 filter predicates: 0 - 1 - 2 {(key < 20)} + 1 {(key < 20)} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -119,6 +156,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 20)} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -165,20 +249,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: a:src1 Fetch Operator limit: -1 - a:src2 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: a:src1 TableScan @@ -189,15 +275,13 @@ STAGE PLANS: predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 20)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - a:src2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: TableScan alias: src2 filterExpr: ((key < 10) and (key < 15)) (type: boolean) @@ -205,17 +289,45 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 20)} + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + a:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a:$INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {(key < 20)} + keys: + 0 _col0 (type: string) + 1 key (type: string) + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -223,34 +335,43 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Right Outer Join 0 to 1 filter predicates: 0 - 1 - 2 {(key < 20)} + 1 {(key < 20)} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -265,6 +386,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 20)} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/auto_join21.q.out b/ql/src/test/results/clientpositive/auto_join21.q.out index 243f8721c3..b68259cf58 100644 --- a/ql/src/test/results/clientpositive/auto_join21.q.out +++ b/ql/src/test/results/clientpositive/auto_join21.q.out @@ -9,34 +9,23 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) src2 TableScan alias: src2 @@ -49,56 +38,152 @@ STAGE PLANS: filter predicates: 0 {(key < 10)} 1 - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 - Right Outer Join 1 to 2 filter predicates: 0 {(key < 10)} 1 - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/auto_join28.q.out b/ql/src/test/results/clientpositive/auto_join28.q.out index cc8a3a5675..3c62b1a92c 100644 --- a/ql/src/test/results/clientpositive/auto_join28.q.out +++ b/ql/src/test/results/clientpositive/auto_join28.q.out @@ -9,34 +9,23 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) src2 TableScan alias: src2 @@ -49,56 +38,152 @@ STAGE PLANS: filter predicates: 0 {(key < 10)} 1 - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 - Right Outer Join 1 to 2 filter predicates: 0 {(key < 10)} 1 - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -116,12 +201,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src2 @@ -143,30 +228,23 @@ STAGE PLANS: filter predicates: 0 {(key < 10)} 1 - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) src3 TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -175,25 +253,30 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 1 to 2 filter predicates: 0 {(key < 10)} 1 - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -201,10 +284,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -227,12 +310,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -254,11 +337,9 @@ STAGE PLANS: filter predicates: 0 1 {(key > 10)} - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) src3 TableScan alias: src3 @@ -268,16 +349,11 @@ STAGE PLANS: predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -286,25 +362,30 @@ STAGE PLANS: Map Join Operator condition map: Right Outer Join 0 to 1 - Left Outer Join 1 to 2 filter predicates: 0 1 {(key > 10)} - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -312,10 +393,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -338,20 +419,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: src1 Fetch Operator limit: -1 - src2 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: src1 TableScan @@ -365,26 +448,58 @@ STAGE PLANS: filter predicates: 0 1 {(key > 10)} - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - src2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: TableScan alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Right Outer Join 0 to 1 filter predicates: 0 1 {(key > 10)} - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -393,41 +508,92 @@ STAGE PLANS: Map Join Operator condition map: Right Outer Join 0 to 1 - Right Outer Join 1 to 2 filter predicates: 0 - 1 {(key > 10)} - 2 {(key < 10)} + 1 {(key < 10)} keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/auto_join29.q.out b/ql/src/test/results/clientpositive/auto_join29.q.out index 0e0074b88c..5f10d5f9fe 100644 --- a/ql/src/test/results/clientpositive/auto_join29.q.out +++ b/ql/src/test/results/clientpositive/auto_join29.q.out @@ -9,34 +9,23 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) src2 TableScan alias: src2 @@ -49,56 +38,152 @@ STAGE PLANS: filter predicates: 0 {(key < 10)} 1 - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 - Right Outer Join 1 to 2 filter predicates: 0 {(key < 10)} 1 - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -624,12 +709,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src2 @@ -651,30 +736,23 @@ STAGE PLANS: filter predicates: 0 {(key < 10)} 1 - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) src3 TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -683,25 +761,30 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 1 to 2 filter predicates: 0 {(key < 10)} 1 - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -709,10 +792,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 73158 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1243,12 +1326,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -1270,11 +1353,9 @@ STAGE PLANS: filter predicates: 0 1 {(key > 10)} - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) src3 TableScan alias: src3 @@ -1284,16 +1365,11 @@ STAGE PLANS: predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1302,25 +1378,30 @@ STAGE PLANS: Map Join Operator condition map: Right Outer Join 0 to 1 - Left Outer Join 1 to 2 filter predicates: 0 1 {(key > 10)} - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1328,10 +1409,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1204 Data size: 554114 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1874,20 +1955,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: src1 Fetch Operator limit: -1 - src2 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: src1 TableScan @@ -1901,26 +1984,58 @@ STAGE PLANS: filter predicates: 0 1 {(key > 10)} - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - src2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: TableScan alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Right Outer Join 0 to 1 filter predicates: 0 1 {(key > 10)} - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1929,41 +2044,92 @@ STAGE PLANS: Map Join Operator condition map: Right Outer Join 0 to 1 - Right Outer Join 1 to 2 filter predicates: 0 - 1 {(key > 10)} - 2 {(key < 10)} + 1 {(key < 10)} keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2501,12 +2667,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -2528,22 +2694,20 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) src3 TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -2556,21 +2720,27 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2578,10 +2748,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 221 Data size: 102884 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2612,20 +2782,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-10 is a root stage + Stage-8 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: src1 Fetch Operator limit: -1 - src2 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: src1 TableScan @@ -2636,15 +2808,13 @@ STAGE PLANS: predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 10)} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - src2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: TableScan alias: src2 filterExpr: ((key < 10) and (key > 10)) (type: boolean) @@ -2652,17 +2822,45 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {(key < 10)} + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2670,42 +2868,93 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 + Right Outer Join 0 to 1 filter predicates: 0 - 1 - 2 {(key < 10)} + 1 {(key < 10)} keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -3231,12 +3480,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src2 @@ -3249,39 +3498,32 @@ STAGE PLANS: src2 TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) + filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator filter predicates: 0 {(key < 10)} 1 - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) src3 TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 {(key < 10)} - 1 - 2 keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -3290,25 +3532,33 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Inner Join 1 to 2 filter predicates: 0 {(key < 10)} 1 - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col5 < 10) (type: boolean) + Statistics: Num rows: 254 Data size: 60876 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3316,10 +3566,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 401 Data size: 184586 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3350,18 +3600,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src1 Fetch Operator limit: -1 - src2 + src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -3377,34 +3627,27 @@ STAGE PLANS: filter predicates: 0 1 {(key > 10)} - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - src2 + src3 TableScan - alias: src2 + alias: src3 filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator - filter predicates: - 0 - 1 {(key > 10)} - 2 keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: src3 + alias: src2 filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -3413,25 +3656,30 @@ STAGE PLANS: Map Join Operator condition map: Right Outer Join 0 to 1 - Inner Join 1 to 2 filter predicates: 0 1 {(key > 10)} - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 414 Data size: 221076 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work @@ -3495,18 +3743,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: src1 Fetch Operator limit: -1 - src2 + src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -3522,27 +3770,25 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - src2 + src3 TableScan - alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) + alias: src3 + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + alias: src2 + filterExpr: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 10) and (key > 10)) (type: boolean) @@ -3550,21 +3796,27 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 135 Data size: 72090 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/auto_join3.q.out b/ql/src/test/results/clientpositive/auto_join3.q.out index c415b3d55b..8b137eb346 100644 --- a/ql/src/test/results/clientpositive/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/auto_join3.q.out @@ -19,20 +19,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n140 STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-0, Stage-3 - Stage-3 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 Fetch Operator limit: -1 - $hdt$_1:src2 + $hdt$_2:src3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -52,76 +52,80 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - $hdt$_1:src2 + $hdt$_2:src3 TableScan - alias: src2 + alias: src3 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan - alias: src3 + alias: src2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n140 + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: key, value + expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n140 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1251 Data size: 118845 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -135,7 +139,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n140 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -143,7 +147,7 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n140 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/auto_join31.q.out b/ql/src/test/results/clientpositive/auto_join31.q.out index dd2e515fab..68fd705851 100644 --- a/ql/src/test/results/clientpositive/auto_join31.q.out +++ b/ql/src/test/results/clientpositive/auto_join31.q.out @@ -25,18 +25,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: x:src Fetch Operator limit: -1 - y:src + z:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -52,50 +52,61 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - y:src + z:src TableScan alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/auto_join7.q.out b/ql/src/test/results/clientpositive/auto_join7.q.out index 6efe18b478..f325e2044e 100644 --- a/ql/src/test/results/clientpositive/auto_join7.q.out +++ b/ql/src/test/results/clientpositive/auto_join7.q.out @@ -52,9 +52,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n147 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0, Stage-3 - Stage-3 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-1 @@ -94,6 +96,30 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 39160 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:src3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:src3 TableScan alias: src3 filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) @@ -105,50 +131,52 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Full Outer Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col4) (type: int), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 15675 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 55 Data size: 15675 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1_n147 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) - outputColumnNames: c1, c2, c3, c4, c5, c6 - Statistics: Num rows: 55 Data size: 15675 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') - mode: hash + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 110 Data size: 58740 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col4) (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1_n147 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Move Operator @@ -160,7 +188,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n147 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -168,7 +196,7 @@ STAGE PLANS: Column Types: int, string, int, string, int, string Table: default.dest1_n147 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out index f93749be9e..dc1338a5a7 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -318,21 +318,21 @@ POSTHOOK: Input: default@smalltable_n0 POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-9 is a root stage , consists of Stage-11, Stage-12, Stage-1 - Stage-11 has a backup stage: Stage-1 - Stage-7 depends on stages: Stage-11 - Stage-10 depends on stages: Stage-1, Stage-7, Stage-8 - Stage-6 depends on stages: Stage-10 - Stage-12 has a backup stage: Stage-1 - Stage-8 depends on stages: Stage-12 + Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1 + Stage-13 has a backup stage: Stage-1 + Stage-9 depends on stages: Stage-13 + Stage-12 depends on stages: Stage-1, Stage-9, Stage-10 + Stage-7 depends on stages: Stage-12 + Stage-14 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-14 Stage-1 - Stage-0 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-11 Conditional Operator - Stage: Stage-11 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_2:src2 @@ -356,7 +356,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-7 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -388,7 +388,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-10 + Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:smalltable_n0 @@ -413,8 +413,7 @@ STAGE PLANS: HashTable Sink Operator keys: 0 (_col1 + _col3) (type: double) - 1 _col0 (type: double) - 2 _col1 (type: double) + 1 _col1 (type: double) $hdt$_3:smalltable2_n0 TableScan alias: smalltable2_n0 @@ -431,38 +430,43 @@ STAGE PLANS: keys: 0 (_col1 + _col3) (type: double) 1 _col0 (type: double) - 2 _col1 (type: double) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 (_col1 + _col3) (type: double) 1 _col0 (type: double) - 2 _col1 (type: double) - outputColumnNames: _col0, _col2, _col5 - Statistics: Num rows: 1740 Data size: 330638 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1740 Data size: 330638 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1740 Data size: 330638 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + outputColumnNames: _col0, _col2, _col5 + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-12 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src1 @@ -486,7 +490,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/auto_join_stats2.q.out b/ql/src/test/results/clientpositive/auto_join_stats2.q.out index de7aa8cb8e..69623472fc 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats2.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -178,12 +178,12 @@ POSTHOOK: Input: default@smalltable2 POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-10 is a root stage + Stage-7 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:smalltable @@ -211,8 +211,7 @@ STAGE PLANS: HashTable Sink Operator keys: 0 (_col1 + _col3) (type: double) - 1 _col0 (type: double) - 2 _col1 (type: double) + 1 _col1 (type: double) $hdt$_1:src1 TableScan alias: src1 @@ -245,9 +244,8 @@ STAGE PLANS: keys: 0 (_col1 + _col3) (type: double) 1 _col0 (type: double) - 2 _col1 (type: double) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -272,24 +270,30 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 (_col1 + _col3) (type: double) 1 _col0 (type: double) - 2 _col1 (type: double) - outputColumnNames: _col0, _col2, _col5 - Statistics: Num rows: 1740 Data size: 330638 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1740 Data size: 330638 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1740 Data size: 330638 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 870 Data size: 165319 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 (_col1 + _col3) (type: double) + 1 _col1 (type: double) + outputColumnNames: _col0, _col2, _col5 + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out index c473b50f1b..a0078461be 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -115,34 +115,34 @@ POSTHOOK: Input: default@t3_n5 POSTHOOK: Input: default@t4_n1 POSTHOOK: Output: default@dest_co1 STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-5 is a root stage STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: n + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: m + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -164,40 +164,89 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col1 (type: string) + expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co1 - Stage: Stage-1 + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: x + alias: n filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: y + alias: m filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -212,50 +261,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col2 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co1 - - Stage: Stage-2 - Stats Work - Basic Stats Work: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_co1 SELECT b.key, d.val @@ -390,30 +414,36 @@ STAGE PLANS: Reduce Operator Tree: Demux Operator Statistics: Num rows: 4 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 12 Data size: 620 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 13 Data size: 682 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 682 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 682 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 Join Operator condition map: Inner Join 0 to 1 @@ -427,24 +457,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 12 Data size: 620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 13 Data size: 682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 682 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -522,28 +550,30 @@ POSTHOOK: Input: default@t3_n5 POSTHOOK: Input: default@t4_n1 POSTHOOK: Output: default@dest_co3 STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-14 is a root stage + Stage-10 depends on stages: Stage-14 + Stage-9 depends on stages: Stage-10, Stage-11 , consists of Stage-12, Stage-13, Stage-2 + Stage-12 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-12 + Stage-0 depends on stages: Stage-2, Stage-7, Stage-8 + Stage-3 depends on stages: Stage-0 + Stage-13 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-13 + Stage-2 + Stage-15 is a root stage + Stage-11 depends on stages: Stage-15 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:x - Fetch Operator - limit: -1 $hdt$_1:y Fetch Operator limit: -1 - $hdt$_2:$hdt$_3:m - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:x + $hdt$_1:y TableScan - alias: x + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -557,10 +587,12 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - $hdt$_1:y + + Stage: Stage-10 + Map Reduce + Map Operator Tree: TableScan - alias: y + alias: x filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -570,11 +602,170 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-9 + Conditional Operator + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:$hdt$_3:m + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: $hdt$_2:$hdt$_3:m TableScan alias: m @@ -592,7 +783,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-6 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -618,46 +809,16 @@ STAGE PLANS: expressions: _col2 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co3 - - Stage: Stage-2 - Stats Work - Basic Stats Work: - PREHOOK: query: INSERT OVERWRITE TABLE dest_co3 SELECT b.key, d.val FROM diff --git a/ql/src/test/results/clientpositive/cross_join_merge.q.out b/ql/src/test/results/clientpositive/cross_join_merge.q.out index 300f50e628..b01ae26387 100644 --- a/ql/src/test/results/clientpositive/cross_join_merge.q.out +++ b/ql/src/test/results/clientpositive/cross_join_merge.q.out @@ -1,4 +1,5 @@ -Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select src1.key from src src1 join src src2 join src src3 PREHOOK: type: QUERY @@ -11,7 +12,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -36,6 +38,30 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250000 Data size: 21750000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -48,11 +74,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 outputColumnNames: _col0 Statistics: Num rows: 125000000 Data size: 10875000000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -81,7 +105,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -119,6 +144,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src3 filterExpr: key is not null (type: boolean) @@ -139,11 +189,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -172,7 +220,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -210,6 +259,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src3 filterExpr: key is not null (type: boolean) @@ -230,11 +304,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -469,7 +541,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -507,6 +580,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src3 filterExpr: key is not null (type: boolean) @@ -526,12 +624,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - Inner Join 0 to 2 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1251 Data size: 108837 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/empty_join.q.out b/ql/src/test/results/clientpositive/empty_join.q.out index f17be28b66..52f3c18972 100644 --- a/ql/src/test/results/clientpositive/empty_join.q.out +++ b/ql/src/test/results/clientpositive/empty_join.q.out @@ -61,12 +61,12 @@ POSTHOOK: Input: default@test_2_n2 POSTHOOK: Input: default@test_3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:t2 @@ -92,7 +92,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) $hdt$_2:t3 TableScan alias: t3 @@ -109,9 +108,8 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -128,20 +126,26 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/explain_logical.q.out b/ql/src/test/results/clientpositive/explain_logical.q.out index 2f6f89149c..1ff9a25964 100644 --- a/ql/src/test/results/clientpositive/explain_logical.q.out +++ b/ql/src/test/results/clientpositive/explain_logical.q.out @@ -440,7 +440,7 @@ $hdt$_0:srcpart properties: insideView TRUE Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator (FIL_15) + Filter Operator (FIL_17) predicate: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_2) @@ -453,27 +453,39 @@ $hdt$_0:srcpart Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Join Operator (JOIN_12) + Join Operator (JOIN_11) condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 5007 Data size: 1346883 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator (SEL_13) - expressions: _col2 (type: string), _col1 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5007 Data size: 1346883 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator (FS_14) - compressed: false - Statistics: Num rows: 5007 Data size: 1346883 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3164 Data size: 838460 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator (RS_12) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3164 Data size: 838460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) + Join Operator (JOIN_14) + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 5006 Data size: 1346614 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator (SEL_15) + expressions: _col2 (type: string), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5006 Data size: 1346614 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator (FS_16) + compressed: false + Statistics: Num rows: 5006 Data size: 1346614 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe $hdt$_1:src TableScan (TS_3) alias: src @@ -481,7 +493,7 @@ $hdt$_1:src properties: insideView TRUE Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator (FIL_16) + Filter Operator (FIL_18) predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_5) @@ -493,16 +505,14 @@ $hdt$_1:src sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Join Operator (JOIN_12) + Join Operator (JOIN_11) condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 5007 Data size: 1346883 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3164 Data size: 838460 Basic stats: COMPLETE Column stats: COMPLETE $hdt$_2:src3 TableScan (TS_6) alias: src3 @@ -510,29 +520,27 @@ $hdt$_2:src3 properties: insideView TRUE Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator (FIL_17) + Filter Operator (FIL_19) predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator (SEL_8) expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator (RS_11) + Reduce Output Operator (RS_13) key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Join Operator (JOIN_12) + Join Operator (JOIN_14) condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 5007 Data size: 1346883 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5006 Data size: 1346614 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: CREATE VIEW V5_n0 as SELECT * FROM srcpart where ds = '10' PREHOOK: type: CREATEVIEW diff --git a/ql/src/test/results/clientpositive/fold_to_null.q.out b/ql/src/test/results/clientpositive/fold_to_null.q.out index daf4267a53..84c379e42e 100644 --- a/ql/src/test/results/clientpositive/fold_to_null.q.out +++ b/ql/src/test/results/clientpositive/fold_to_null.q.out @@ -31,7 +31,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@t POSTHOOK: Lineage: t.a SCRIPT [] -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select a from t,t2,t3 where (a>3 and null between 0 and 10) is null PREHOOK: type: QUERY @@ -48,7 +49,8 @@ POSTHOOK: Input: default@t3 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +79,30 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 5 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) TableScan alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -89,16 +115,14 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 6 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 6 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -128,7 +152,8 @@ POSTHOOK: Input: default@t3 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -166,6 +191,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan alias: t3 filterExpr: (((c > 5) or null) and ((c * c) < 101) and c is not null) (type: boolean) @@ -186,16 +236,14 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out index cd1489d86f..bfdc84e24e 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -295,9 +295,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: 1 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: INSERT OVERWRITE TABLE test_table_n5 PARTITION (part = '1') diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out index f10fdc980c..53bc817910 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out @@ -186,10 +186,10 @@ Table: test_table_n14 #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 - numRows 2654 - rawDataSize 28466 - totalSize 31120 + numFiles 13 + numRows 259 + rawDataSize 2783 + totalSize 3042 #### A masked pattern was here #### # Storage Information @@ -197,9 +197,9 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: 4 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: INSERT OVERWRITE TABLE test_table_n14 PARTITION (part = '1') diff --git a/ql/src/test/results/clientpositive/infer_join_preds.q.out b/ql/src/test/results/clientpositive/infer_join_preds.q.out index d73005c82d..8b0e4eba0a 100644 --- a/ql/src/test/results/clientpositive/infer_join_preds.q.out +++ b/ql/src/test/results/clientpositive/infer_join_preds.q.out @@ -1164,7 +1164,8 @@ POSTHOOK: Input: default@table2_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1203,6 +1204,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 734 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 734 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col2 (type: string) TableScan alias: d filterExpr: prid is not null (type: boolean) @@ -1224,23 +1251,21 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) - 2 _col0 (type: bigint) outputColumnNames: _col0, _col2, _col5 - Statistics: Num rows: 2 Data size: 1469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 807 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col2 <> _col5) (type: boolean) - Statistics: Num rows: 2 Data size: 1469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 807 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 1469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join12.q.out b/ql/src/test/results/clientpositive/join12.q.out index 80ae337317..1b4a27dbdf 100644 --- a/ql/src/test/results/clientpositive/join12.q.out +++ b/ql/src/test/results/clientpositive/join12.q.out @@ -26,7 +26,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) @@ -85,11 +111,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git a/ql/src/test/results/clientpositive/join20.q.out b/ql/src/test/results/clientpositive/join20.q.out index a6fb023622..8b17f8a842 100644 --- a/ql/src/test/results/clientpositive/join20.q.out +++ b/ql/src/test/results/clientpositive/join20.q.out @@ -13,7 +13,8 @@ POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -45,6 +46,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -57,22 +84,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Right Outer Join 0 to 1 filter predicates: 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} + 1 {(KEY.reducesinkkey0 < 20)} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -80,23 +104,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -681,7 +705,8 @@ POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -713,6 +738,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -725,22 +776,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Right Outer Join 0 to 1 filter predicates: 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} + 1 {(KEY.reducesinkkey0 < 20)} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -748,23 +796,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join21.q.out b/ql/src/test/results/clientpositive/join21.q.out index f059dfc9a4..228f07ca29 100644 --- a/ql/src/test/results/clientpositive/join21.q.out +++ b/ql/src/test/results/clientpositive/join21.q.out @@ -11,7 +11,8 @@ POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -39,6 +40,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -51,22 +81,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - Right Outer Join 1 to 2 + Right Outer Join 0 to 1 filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 {(KEY.reducesinkkey0 < 10)} + 0 + 1 {(KEY.reducesinkkey0 < 10)} keys: - 0 key (type: string) + 0 _col5 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -74,23 +101,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join26.q.out b/ql/src/test/results/clientpositive/join26.q.out index 3bb5295e02..0ff3c5c8b3 100644 --- a/ql/src/test/results/clientpositive/join26.q.out +++ b/ql/src/test/results/clientpositive/join26.q.out @@ -39,27 +39,76 @@ INNER JOIN (SELECT `key` FROM `default`.`src1` WHERE `key` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-0, Stage-3 - Stage-3 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:y + $hdt$_0:z TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator @@ -72,10 +121,9 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Position of Big Table: 2 + Position of Big Table: 0 $hdt$_2:x TableScan alias: x @@ -94,15 +142,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Position of Big Table: 2 + Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator @@ -116,78 +163,85 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 - Position of Big Table: 2 - Statistics: Num rows: 62 Data size: 16616 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 62 Data size: 16616 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 62 Data size: 16616 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1_n10 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1_n10 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n10 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + outputColumnNames: _col1, _col2 + Position of Big Table: 0 + Statistics: Num rows: 39 Data size: 6903 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + Position of Big Table: 0 + Statistics: Num rows: 61 Data size: 16348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 62 Data size: 16616 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1,_col2 - columns.types struct,struct,struct - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 16348 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 61 Data size: 16348 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1_n10 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j1_n10 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n10 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 61 Data size: 16348 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -339,7 +393,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + /src [$hdt$_1:y] Stage: Stage-0 Move Operator @@ -370,7 +424,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1_n10 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: #### A masked pattern was here #### @@ -380,7 +434,7 @@ STAGE PLANS: Table: default.dest_j1_n10 Is Table Level Stats: true - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -398,7 +452,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10002 + base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: diff --git a/ql/src/test/results/clientpositive/join28.q.out b/ql/src/test/results/clientpositive/join28.q.out index 47d71c1fa3..e57863efbb 100644 --- a/ql/src/test/results/clientpositive/join28.q.out +++ b/ql/src/test/results/clientpositive/join28.q.out @@ -33,40 +33,39 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n11 STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-0, Stage-3 - Stage-3 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:y + $hdt$_0:z Fetch Operator limit: -1 $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:y + $hdt$_0:z TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) $hdt$_2:x TableScan alias: x @@ -83,59 +82,64 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n11 + outputColumnNames: _col1 + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: key, value - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -149,7 +153,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1_n11 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -157,7 +161,7 @@ STAGE PLANS: Column Types: string, string Table: default.dest_j1_n11 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join3.q.out b/ql/src/test/results/clientpositive/join3.q.out index 34eb33e138..06e77ca437 100644 --- a/ql/src/test/results/clientpositive/join3.q.out +++ b/ql/src/test/results/clientpositive/join3.q.out @@ -20,9 +20,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n46 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0, Stage-3 - Stage-3 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -60,6 +61,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src3 filterExpr: key is not null (type: boolean) @@ -81,11 +107,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -126,7 +150,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n46 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -134,7 +158,7 @@ STAGE PLANS: Column Types: int, string Table: default.dest1_n46 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join40.q.out b/ql/src/test/results/clientpositive/join40.q.out index 60081f38e4..1b36c1f3f8 100644 --- a/ql/src/test/results/clientpositive/join40.q.out +++ b/ql/src/test/results/clientpositive/join40.q.out @@ -1776,7 +1776,8 @@ POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1808,6 +1809,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -1820,22 +1847,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Right Outer Join 0 to 1 filter predicates: 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} + 1 {(KEY.reducesinkkey0 < 20)} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1843,23 +1867,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 310432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2444,7 +2468,8 @@ POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2476,6 +2501,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 86 Data size: 30616 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -2488,22 +2539,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Right Outer Join 0 to 1 filter predicates: 0 - 1 - 2 {(KEY.reducesinkkey0 < 20)} + 1 {(KEY.reducesinkkey0 < 20)} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 key (type: string) - 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -2511,23 +2559,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 136 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 636 Data size: 161980 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join45.q.out b/ql/src/test/results/clientpositive/join45.q.out index 8a6fd8fcec..482134062e 100644 --- a/ql/src/test/results/clientpositive/join45.q.out +++ b/ql/src/test/results/clientpositive/join45.q.out @@ -1674,7 +1674,9 @@ POSTHOOK: Input: default@src1 35 val_35 NULL NULL 66 val_66 35 val_35 NULL NULL 66 val_66 35 val_35 NULL NULL 66 val_66 -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM cbo_t1 t1 @@ -1700,49 +1702,47 @@ POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-6 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: t1 + alias: t5 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string), (c_int = 42) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 20 Data size: 7218 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + sort order: + Statistics: Num rows: 20 Data size: 7218 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean) TableScan - alias: t2 + alias: t3 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + sort order: Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 100 Data size: 72921 Basic stats: COMPLETE Column stats: COMPLETE + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 400 Data size: 294036 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1750,31 +1750,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: t5 - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string), (c_int = 42) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 20 Data size: 7218 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 20 Data size: 7218 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean) - TableScan - alias: t3 - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 400 Data size: 294036 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string) TableScan alias: t4 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE @@ -1786,6 +1769,30 @@ STAGE PLANS: sort order: Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 8000 Data size: 8815454 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 8000 Data size: 8815454 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: int), _col16 (type: float), _col17 (type: boolean), _col18 (type: string) TableScan Reduce Output Operator sort order: @@ -1795,13 +1802,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 1 - 2 - 3 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 Statistics: Num rows: 800000 Data size: 1467199175 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1822,13 +1825,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + TableScan + alias: t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 100 Data size: 72921 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: ListSink -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM cbo_t1 t1 RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key) @@ -1851,13 +1901,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 diff --git a/ql/src/test/results/clientpositive/join47.q.out b/ql/src/test/results/clientpositive/join47.q.out index 475079dacf..8102c54132 100644 --- a/ql/src/test/results/clientpositive/join47.q.out +++ b/ql/src/test/results/clientpositive/join47.q.out @@ -1654,7 +1654,9 @@ POSTHOOK: Input: default@src1 35 val_35 NULL NULL 66 val_66 35 val_35 NULL NULL 66 val_66 35 val_35 NULL NULL 66 val_66 -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM cbo_t1 t1 @@ -1680,49 +1682,47 @@ POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-6 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: t1 + alias: t5 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string), (c_int = 42) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 20 Data size: 7218 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + sort order: + Statistics: Num rows: 20 Data size: 7218 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean) TableScan - alias: t2 + alias: t3 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + sort order: Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 100 Data size: 72921 Basic stats: COMPLETE Column stats: COMPLETE + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 400 Data size: 294036 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1730,31 +1730,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: t5 - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string), (c_int = 42) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 20 Data size: 7218 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 20 Data size: 7218 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean) - TableScan - alias: t3 - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 400 Data size: 294036 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string) TableScan alias: t4 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE @@ -1766,6 +1749,30 @@ STAGE PLANS: sort order: Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 8000 Data size: 8815454 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 8000 Data size: 8815454 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: int), _col16 (type: float), _col17 (type: boolean), _col18 (type: string) TableScan Reduce Output Operator sort order: @@ -1775,13 +1782,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 1 - 2 - 3 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 residual filter predicates: {((_col0 = _col25) or ((_col29 = _col17) and _col6))} {((_col13 = _col25) or ((_col28 = _col16) and (_col14 = _col26)))} {((_col7 = _col25) or ((_col8 = _col26) and (_col27 = _col21)))} Statistics: Num rows: 337500 Data size: 618975000 Basic stats: COMPLETE Column stats: COMPLETE @@ -1800,13 +1803,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + TableScan + alias: t2 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 100 Data size: 72921 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: ListSink -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM cbo_t1 t1 RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key) @@ -1829,13 +1879,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 - 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 diff --git a/ql/src/test/results/clientpositive/join7.q.out b/ql/src/test/results/clientpositive/join7.q.out index 6d2e80f815..29b1025892 100644 --- a/ql/src/test/results/clientpositive/join7.q.out +++ b/ql/src/test/results/clientpositive/join7.q.out @@ -52,9 +52,10 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1_n17 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0, Stage-3 - Stage-3 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -94,6 +95,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 39160 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 110 Data size: 39160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 filterExpr: ((UDFToDouble(key) > 20.0D) and (UDFToDouble(key) < 25.0D)) (type: boolean) @@ -114,21 +141,19 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Full Outer Join 0 to 1 - Left Outer Join 0 to 2 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 58740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string), UDFToInteger(_col4) (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 15675 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 55 Data size: 15675 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -137,7 +162,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) outputColumnNames: c1, c2, c3, c4, c5, c6 - Statistics: Num rows: 55 Data size: 15675 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 31350 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') mode: hash @@ -160,7 +185,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1_n17 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -168,7 +193,7 @@ STAGE PLANS: Column Types: int, string, int, string, int, string Table: default.dest1_n17 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/join_alt_syntax.q.out index aaf972bb4b..f413e7c39a 100644 --- a/ql/src/test/results/clientpositive/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/join_alt_syntax.q.out @@ -76,7 +76,8 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -114,6 +115,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 6534 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 27 Data size: 6534 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -134,11 +161,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 10164 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -169,7 +194,8 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -207,6 +233,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 6534 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 27 Data size: 6534 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -227,11 +279,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 10164 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out index 41cc56d171..5fea4c2470 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out @@ -10,7 +10,8 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -50,6 +51,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -71,11 +98,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col1 (type: string) + 0 _col10 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 51996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -104,7 +129,8 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -144,6 +170,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -165,11 +217,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col1 (type: string) + 0 _col10 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 51996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out index bf1c042bc2..f1e07d10d5 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out @@ -9,16 +9,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p1 + alias: p3 filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -35,7 +36,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p4 + alias: p2 filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -67,11 +68,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: p3 + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -88,7 +127,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p2 + alias: p4 filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -104,35 +143,21 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out index aa0dc49d25..3364572100 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out @@ -12,7 +12,8 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -52,6 +53,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -73,11 +100,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col1 (type: string) + 0 _col10 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 51996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -108,7 +133,8 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -148,6 +174,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p3 filterExpr: p_name is not null (type: boolean) @@ -169,11 +221,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col1 (type: string) + 0 _col10 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 51996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out index c9a3b21ee4..7ec61126c8 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out @@ -11,16 +11,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p1 + alias: p3 filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -37,7 +38,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p4 + alias: p2 filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -69,11 +70,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: p3 + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: p1 filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -90,7 +129,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan - alias: p2 + alias: p4 filterExpr: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -106,35 +145,21 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 71804 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out index f02dcb174d..8a76f40505 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out @@ -70,7 +70,8 @@ POSTHOOK: Input: default@part3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -110,6 +111,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -131,20 +158,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -172,7 +197,8 @@ POSTHOOK: Input: default@part3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -212,6 +238,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -233,20 +285,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out index b71a058edf..a44725d686 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out @@ -70,7 +70,9 @@ POSTHOOK: Input: default@part3 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -110,6 +112,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -127,6 +155,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col19 (type: string) + sort order: + + Map-reduce partition columns: _col19 (type: string) + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_name is not null (type: boolean) @@ -148,22 +202,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 2 to 3 keys: - 0 _col1 (type: string) + 0 _col19 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) - 3 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 85 Data size: 53110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 85 Data size: 53110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 85 Data size: 53110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out index 2c8c1906ce..82ab905165 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out @@ -72,7 +72,8 @@ POSTHOOK: Input: default@part3_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -112,6 +113,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -133,20 +160,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -176,7 +201,8 @@ POSTHOOK: Input: default@part3_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -216,6 +242,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -237,20 +289,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out index 83ba3188f0..71cf5d7d07 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out @@ -72,7 +72,9 @@ POSTHOOK: Input: default@part3_n1 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -112,6 +114,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -129,6 +157,32 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col19 (type: string) + sort order: + + Map-reduce partition columns: _col19 (type: string) + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_name is not null (type: boolean) @@ -150,22 +204,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 2 to 3 keys: - 0 _col1 (type: string) + 0 _col19 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) - 3 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 85 Data size: 53110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 85 Data size: 53110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 85 Data size: 53110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/join_filters_overlap.q.out index 4960aad249..6cd17d1ddb 100644 --- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -20,7 +20,8 @@ POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,107 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: int) auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a_n4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a_n4 + name: default.a_n4 + Truncated Path -> Alias: + /a_n4 [$hdt$_0:a_n4, $hdt$_1:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + filter mappings: + 0 [1, 1] + filter predicates: + 0 {VALUE._col2} + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5 + columns.types int,int,boolean,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: int), _col2 (type: boolean), _col4 (type: int), _col5 (type: int) + auto parallelism: false TableScan alias: c filterExpr: (value = 60) (type: boolean) @@ -84,12 +186,34 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 + tag: 1 value expressions: _col1 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5 + columns.types int,int,boolean,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5 + columns.types int,int,boolean,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition base file name: a_n4 @@ -139,23 +263,21 @@ STAGE PLANS: name: default.a_n4 name: default.a_n4 Truncated Path -> Alias: - /a_n4 [$hdt$_0:a_n4, $hdt$_1:b, $hdt$_2:c] + /a_n4 [$hdt$_2:c] +#### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 filter mappings: - 0 [1, 1, 2, 1] + 0 [1, 1] filter predicates: - 0 {VALUE._col2} {VALUE._col1} + 0 {VALUE._col1} 1 - 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7 Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -223,7 +345,8 @@ POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -268,6 +391,107 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean) auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a_n4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a_n4 + name: default.a_n4 + Truncated Path -> Alias: + /a_n4 [$hdt$_0:a_n4, $hdt$_1:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {VALUE._col2} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types int,int,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: boolean) + auto parallelism: false TableScan alias: c filterExpr: (value = 60) (type: boolean) @@ -287,12 +511,34 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 + tag: 1 value expressions: _col1 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types int,int,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types int,int,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition base file name: a_n4 @@ -342,23 +588,21 @@ STAGE PLANS: name: default.a_n4 name: default.a_n4 Truncated Path -> Alias: - /a_n4 [$hdt$_0:a_n4, $hdt$_1:b, $hdt$_2:c] + /a_n4 [$hdt$_2:c] +#### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 - Left Outer Join 1 to 2 + Left Outer Join 0 to 1 filter mappings: - 1 [0, 1, 2, 1] + 0 [1, 1] filter predicates: - 0 - 1 {VALUE._col2} {VALUE._col1} - 2 + 0 {VALUE._col3} + 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col6, _col7 Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -426,7 +670,8 @@ POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -471,6 +716,107 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean) auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a_n4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a_n4 + name: default.a_n4 + Truncated Path -> Alias: + /a_n4 [$hdt$_0:a_n4, $hdt$_1:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {VALUE._col2} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types int,int,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: boolean) + auto parallelism: false TableScan alias: c filterExpr: (value = 60) (type: boolean) @@ -490,12 +836,34 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 + tag: 1 value expressions: _col1 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types int,int,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types int,int,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition base file name: a_n4 @@ -545,23 +913,21 @@ STAGE PLANS: name: default.a_n4 name: default.a_n4 Truncated Path -> Alias: - /a_n4 [$hdt$_0:a_n4, $hdt$_1:b, $hdt$_2:c] + /a_n4 [$hdt$_2:c] +#### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 - Left Outer Join 1 to 2 + Left Outer Join 0 to 1 filter mappings: - 1 [0, 1, 2, 1] + 0 [1, 1] filter predicates: - 0 - 1 {VALUE._col2} {VALUE._col1} - 2 + 0 {VALUE._col3} + 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col6, _col7 Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -629,7 +995,9 @@ POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -669,15 +1037,117 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean) auto parallelism: false - TableScan - alias: c - filterExpr: (value = 60) (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 60) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a_n4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a_n4 + name: default.a_n4 + Truncated Path -> Alias: + /a_n4 [$hdt$_0:a_n4, $hdt$_1:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter mappings: + 0 [1, 1] + 1 [0, 1] + filter predicates: + 0 {VALUE._col2} + 1 {VALUE._col2} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5,_col6 + columns.types int,int,boolean,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: boolean), _col5 (type: int), _col6 (type: boolean) + auto parallelism: false + TableScan + alias: c + filterExpr: (value = 60) (type: boolean) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), 60 (type: int) outputColumnNames: _col0, _col1 @@ -688,9 +1158,133 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 + tag: 1 value expressions: _col1 (type: int) auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5,_col6 + columns.types int,int,boolean,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5,_col6 + columns.types int,int,boolean,int,int,boolean + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: a_n4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a_n4 + name: default.a_n4 + Truncated Path -> Alias: + /a_n4 [$hdt$_2:c] +#### A masked pattern was here #### + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + filter mappings: + 0 [1, 1] + filter predicates: + 0 {VALUE._col5} + 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8, _col9 + Statistics: Num rows: 9 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5,_col8,_col9 + columns.types int,int,boolean,int,int,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: int), _col2 (type: boolean), _col4 (type: int), _col5 (type: int), _col8 (type: int), _col9 (type: int) + auto parallelism: false TableScan alias: d filterExpr: (value = 40) (type: boolean) @@ -710,12 +1304,34 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: 3 + tag: 1 value expressions: _col1 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5,_col8,_col9 + columns.types int,int,boolean,int,int,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col4,_col5,_col8,_col9 + columns.types int,int,boolean,int,int,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition base file name: a_n4 @@ -765,27 +1381,21 @@ STAGE PLANS: name: default.a_n4 name: default.a_n4 Truncated Path -> Alias: - /a_n4 [$hdt$_0:a_n4, $hdt$_1:b, $hdt$_2:c, $hdt$_3:d] + /a_n4 [$hdt$_3:d] +#### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: Join Operator condition map: - Full Outer Join 0 to 1 - Left Outer Join 1 to 2 - Left Outer Join 0 to 3 + Left Outer Join 0 to 1 filter mappings: - 0 [1, 1, 3, 1] - 1 [0, 1, 2, 1] + 0 [1, 1] filter predicates: - 0 {VALUE._col2} {VALUE._col1} - 1 {VALUE._col2} {VALUE._col1} - 2 - 3 + 0 {VALUE._col1} + 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 Statistics: Num rows: 9 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -844,7 +1454,9 @@ POSTHOOK: Input: default@a_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -889,6 +1501,107 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: int) auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: a_n4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a_n4 + name: default.a_n4 + Truncated Path -> Alias: + /a_n4 [$hdt$_0:a_n4, $hdt$_1:b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + filter mappings: + 0 [1, 1] + filter predicates: + 0 {VALUE._col3} + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6 + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col5,_col6 + columns.types int,int,boolean,boolean,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean), _col5 (type: int), _col6 (type: int) + auto parallelism: false TableScan alias: c filterExpr: (value = 60) (type: boolean) @@ -908,9 +1621,133 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 + tag: 1 value expressions: _col1 (type: int) auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col5,_col6 + columns.types int,int,boolean,boolean,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col5,_col6 + columns.types int,int,boolean,boolean,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: a_n4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.a_n4 + numFiles 1 + numRows 3 + rawDataSize 18 + serialization.ddl struct a_n4 { i32 key, i32 value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 21 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.a_n4 + name: default.a_n4 + Truncated Path -> Alias: + /a_n4 [$hdt$_2:c] +#### A masked pattern was here #### + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + filter mappings: + 0 [1, 1] + filter predicates: + 0 {VALUE._col2} + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col5,_col6,_col7,_col8 + columns.types int,int,boolean,int,int,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: int), _col2 (type: boolean), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) + auto parallelism: false TableScan alias: d filterExpr: (value = 40) (type: boolean) @@ -930,12 +1767,34 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: 3 + tag: 1 value expressions: _col1 (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col5,_col6,_col7,_col8 + columns.types int,int,boolean,int,int,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col5,_col6,_col7,_col8 + columns.types int,int,boolean,int,int,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition base file name: a_n4 @@ -985,26 +1844,21 @@ STAGE PLANS: name: default.a_n4 name: default.a_n4 Truncated Path -> Alias: - /a_n4 [$hdt$_0:a_n4, $hdt$_1:b, $hdt$_2:c, $hdt$_3:d] + /a_n4 [$hdt$_3:d] +#### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - Left Outer Join 0 to 3 filter mappings: - 0 [1, 1, 2, 1, 3, 1] + 0 [1, 1] filter predicates: - 0 {VALUE._col3} {VALUE._col2} {VALUE._col1} + 0 {VALUE._col1} 1 - 2 - 3 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 3 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git a/ql/src/test/results/clientpositive/join_grp_diff_keys.q.out b/ql/src/test/results/clientpositive/join_grp_diff_keys.q.out index 11ca700028..fc71d78b00 100644 --- a/ql/src/test/results/clientpositive/join_grp_diff_keys.q.out +++ b/ql/src/test/results/clientpositive/join_grp_diff_keys.q.out @@ -59,7 +59,9 @@ POSTHOOK: Input: default@split STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -97,6 +99,31 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE TableScan alias: split filterExpr: (id is not null and line_id is not null) (type: boolean) @@ -113,6 +140,31 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE TableScan alias: forecast filterExpr: (id is not null and line_id is not null) (type: boolean) @@ -133,21 +185,17 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) - 2 _col0 (type: int), _col1 (type: int) - 3 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 633 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 633 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -155,7 +203,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -163,7 +211,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 633 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -172,10 +220,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out index 6ee7ba8ed4..c06a966b69 100644 --- a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out @@ -19,7 +19,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -28,66 +29,89 @@ STAGE PLANS: TableScan alias: a filterExpr: (key is not null and hr is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), hr (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL TableScan alias: b filterExpr: (key is not null and hr is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), hr (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12658 Data size: 3430318 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12658 Data size: 3430318 Basic stats: COMPLETE Column stats: PARTIAL TableScan alias: c filterExpr: (hr is not null and key is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), hr (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 542000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - 2 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 80115 Data size: 640920 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 80113 Data size: 640904 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false table: @@ -95,13 +119,13 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -109,10 +133,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_reorder2.q.out b/ql/src/test/results/clientpositive/join_reorder2.q.out index cdae026816..927ae7a75f 100644 --- a/ql/src/test/results/clientpositive/join_reorder2.q.out +++ b/ql/src/test/results/clientpositive/join_reorder2.q.out @@ -86,7 +86,9 @@ POSTHOOK: Input: default@t4_n3 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -118,6 +120,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) @@ -131,6 +159,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col11 (type: string) TableScan alias: d filterExpr: key is not null (type: boolean) @@ -148,22 +202,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 keys: - 0 key (type: string) + 0 _col10 (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_reorder3.q.out b/ql/src/test/results/clientpositive/join_reorder3.q.out index f173eafc1f..fc35b69ecc 100644 --- a/ql/src/test/results/clientpositive/join_reorder3.q.out +++ b/ql/src/test/results/clientpositive/join_reorder3.q.out @@ -86,7 +86,9 @@ POSTHOOK: Input: default@t4_n10 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -118,6 +120,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) @@ -131,6 +159,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col11 (type: string) TableScan alias: d filterExpr: key is not null (type: boolean) @@ -148,22 +202,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 keys: - 0 key (type: string) + 0 _col10 (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_reorder4.q.out b/ql/src/test/results/clientpositive/join_reorder4.q.out index b7d5a2c3b6..d99526ad04 100644 --- a/ql/src/test/results/clientpositive/join_reorder4.q.out +++ b/ql/src/test/results/clientpositive/join_reorder4.q.out @@ -60,7 +60,8 @@ POSTHOOK: Input: default@t3_n32 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -92,6 +93,32 @@ STAGE PLANS: Map-reduce partition columns: key2 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: c filterExpr: key3 is not null (type: boolean) @@ -109,20 +136,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) + 0 _col0 (type: string) + 1 key3 (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -161,7 +186,8 @@ POSTHOOK: Input: default@t3_n32 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -193,6 +219,32 @@ STAGE PLANS: Map-reduce partition columns: key2 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: c filterExpr: key3 is not null (type: boolean) @@ -210,20 +262,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) + 0 _col0 (type: string) + 1 key3 (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -262,7 +312,8 @@ POSTHOOK: Input: default@t3_n32 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -294,6 +345,32 @@ STAGE PLANS: Map-reduce partition columns: key2 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: c filterExpr: key3 is not null (type: boolean) @@ -311,20 +388,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) + 0 _col0 (type: string) + 1 key3 (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/keep_uniform.q.out b/ql/src/test/results/clientpositive/keep_uniform.q.out index f509bf4fc7..959696f67e 100644 --- a/ql/src/test/results/clientpositive/keep_uniform.q.out +++ b/ql/src/test/results/clientpositive/keep_uniform.q.out @@ -431,17 +431,18 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-11, Stage-14 - Stage-4 depends on stages: Stage-3 + Stage-3 depends on stages: Stage-2, Stage-12 + Stage-4 depends on stages: Stage-3, Stage-15 Stage-5 depends on stages: Stage-4 Stage-6 depends on stages: Stage-5 Stage-7 depends on stages: Stage-6 - Stage-10 is a root stage - Stage-11 depends on stages: Stage-10 - Stage-15 is a root stage - Stage-13 depends on stages: Stage-15 - Stage-14 depends on stages: Stage-13 - Stage-0 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-7 + Stage-11 is a root stage + Stage-12 depends on stages: Stage-11 + Stage-16 is a root stage + Stage-14 depends on stages: Stage-16 + Stage-15 depends on stages: Stage-14 + Stage-0 depends on stages: Stage-8 STAGE PLANS: Stage: Stage-1 @@ -568,6 +569,39 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 319 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 319 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) TableScan Reduce Output Operator key expressions: _col0 (type: int) @@ -585,13 +619,11 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col3 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -599,7 +631,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -607,7 +639,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) TableScan alias: date_dim @@ -640,13 +672,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4), sum(_col5) keys: _col3 (type: int) mode: hash outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -654,7 +686,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -670,7 +702,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) Execution mode: vectorized Map Vectorization: @@ -698,7 +730,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col1), sum(_col2) mode: partial2 @@ -711,7 +743,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -764,7 +796,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -818,7 +850,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -890,7 +922,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -940,7 +972,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-15 + Stage: Stage-16 Map Reduce Map Operator Tree: TableScan @@ -1007,7 +1039,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1060,7 +1092,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/llap/auto_join21.q.out b/ql/src/test/results/clientpositive/llap/auto_join21.q.out index f2af6d801a..c12ff877b2 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join21.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join21.q.out @@ -17,8 +17,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -26,15 +27,28 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {(key < 10)} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -51,54 +65,44 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 1 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 {(KEY.reducesinkkey0 < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_join29.q.out b/ql/src/test/results/clientpositive/llap/auto_join29.q.out index 9e4ada1582..f7ae23b906 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join29.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join29.q.out @@ -17,8 +17,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -26,15 +27,28 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {(key < 10)} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -51,54 +65,44 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 1 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 {(KEY.reducesinkkey0 < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 687326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -637,8 +641,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -646,15 +650,40 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {(key < 10)} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -671,58 +700,33 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 1 to 2 - filter predicates: - 0 {(KEY.reducesinkkey0 < 10)} - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1261,8 +1265,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1282,20 +1286,45 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key > 10)} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -1312,41 +1341,16 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Left Outer Join 1 to 2 - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 262 Data size: 139908 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 262 Data size: 139908 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 262 Data size: 139908 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 262 Data size: 139908 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 262 Data size: 139908 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 318086 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1897,8 +1901,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1918,67 +1923,70 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: src2 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key > 10)} + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 2 + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 - 1 {(KEY.reducesinkkey0 > 10)} - 2 {(KEY.reducesinkkey0 < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 415 Data size: 221610 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1705 Data size: 643826 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2529,8 +2537,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2542,15 +2550,37 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 + Statistics: Num rows: 110 Data size: 49128 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 110 Data size: 49128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 110 Data size: 49128 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -2567,54 +2597,33 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join 1 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 49128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 49128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2653,8 +2662,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2666,15 +2676,25 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -2691,54 +2711,44 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(key < 10)} + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 1 + Statistics: Num rows: 587 Data size: 135814 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 587 Data size: 135814 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 587 Data size: 135814 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 - filter predicates: - 0 - 1 - 2 {(KEY.reducesinkkey0 < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 587 Data size: 135814 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 587 Data size: 135814 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3289,44 +3299,53 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Inner Join 1 to 2 filter predicates: 0 {(key < 10)} 1 - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 762 Data size: 182450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col5 < 10) (type: boolean) + Statistics: Num rows: 254 Data size: 60876 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 + Statistics: Num rows: 254 Data size: 106088 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 254 Data size: 106088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 254 Data size: 106088 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: src2 - filterExpr: ((key < 10) and (key > 10)) (type: boolean) + filterExpr: (key > 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key > 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -3334,16 +3353,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -3353,10 +3372,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 254 Data size: 106088 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 87 Data size: 46458 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 254 Data size: 106088 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3428,28 +3447,34 @@ STAGE PLANS: Map Join Operator condition map: Right Outer Join 0 to 1 - Inner Join 1 to 2 filter predicates: 0 1 {(key > 10)} - 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 4 Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 166 Data size: 88644 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 4 @@ -3539,8 +3564,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3555,27 +3581,22 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 2 + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 55 Data size: 19580 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -3592,24 +3613,36 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: src3 - filterExpr: ((key > 10) and (key < 10)) (type: boolean) + filterExpr: (key < 10) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: value (type: string) + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 0 Map 1 + Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Statistics: Num rows: 55 Data size: 29370 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_join30.q.out b/ql/src/test/results/clientpositive/llap/auto_join30.q.out index ec58793b4a..0167e335ed 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join30.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join30.q.out @@ -442,25 +442,32 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 + outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 @@ -565,44 +572,75 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: src + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -618,28 +656,6 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: @@ -722,8 +738,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -735,14 +751,38 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -759,7 +799,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -776,28 +816,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -879,8 +897,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -892,14 +911,25 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: src @@ -916,7 +946,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: src @@ -925,36 +955,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1036,8 +1058,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1056,7 +1079,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: src @@ -1065,15 +1088,25 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: src @@ -1082,36 +1115,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 2 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index 7d792ac430..df8af5d894 100644 --- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -1342,7 +1342,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1370,6 +1371,21 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + Map 5 Map Operator Tree: TableScan alias: a @@ -1382,26 +1398,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out index 72734089dc..8224ab47e6 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out @@ -1029,7 +1029,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 2 @@ -1099,7 +1100,7 @@ STAGE PLANS: /bucket_small_n11/ds=2008-04-08 [a] Map Operator Tree: TableScan - alias: c + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 240 Data size: 158376 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -1107,6 +1108,24 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 228 Data size: 150457 Basic stats: PARTIAL Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + Position of Big Table: 1 + Statistics: Num rows: 250 Data size: 165502 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 165502 Basic stats: PARTIAL Column stats: NONE + tag: 0 + auto parallelism: true + Execution mode: llap Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1209,11 +1228,12 @@ STAGE PLANS: name: default.bucket_big_n11 name: default.bucket_big_n11 Truncated Path -> Alias: - /bucket_big_n11/ds=2008-04-08 [c] - /bucket_big_n11/ds=2008-04-09 [c] + /bucket_big_n11/ds=2008-04-08 [b] + /bucket_big_n11/ds=2008-04-09 [b] + Map 5 Map Operator Tree: TableScan - alias: b + alias: c filterExpr: key is not null (type: boolean) Statistics: Num rows: 240 Data size: 158376 Basic stats: PARTIAL Column stats: NONE GatherStats: false @@ -1221,34 +1241,22 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 228 Data size: 150457 Basic stats: PARTIAL Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 1 - Statistics: Num rows: 501 Data size: 331005 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 228 Data size: 150457 Basic stats: PARTIAL Column stats: NONE + tag: 1 + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1298,6 +1306,7 @@ STAGE PLANS: name: default.bucket_big_n11 #### A masked pattern was here #### Partition + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1346,9 +1355,33 @@ STAGE PLANS: name: default.bucket_big_n11 name: default.bucket_big_n11 Truncated Path -> Alias: - /bucket_big_n11/ds=2008-04-08 [b] - /bucket_big_n11/ds=2008-04-09 [b] + /bucket_big_n11/ds=2008-04-08 [c] + /bucket_big_n11/ds=2008-04-09 [c] Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 182052 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 4 Execution mode: vectorized, llap Needs Tagging: false Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out index 79620126a8..183e31edff 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out @@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/auto_sortmerge_join/sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Shuffle Join MERGEJOIN[54][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small_n15 a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big_n15 c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big_n15 @@ -176,8 +176,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 6 (XPROD_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -251,18 +252,36 @@ STAGE PLANS: /bucket_medium/ds=2008-04-08 [b] Map Operator Tree: TableScan - alias: a + alias: c filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 368 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 240 Data size: 39008 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 368 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 211 Data size: 34294 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 211 Data size: 34294 Basic stats: PARTIAL Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Position of Big Table: 0 + Statistics: Num rows: 232 Data size: 37723 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 232 Data size: 37723 Basic stats: PARTIAL Column stats: NONE + tag: 0 + auto parallelism: true + Execution mode: llap Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -273,23 +292,23 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small_n15 - numFiles 2 + name default.bucket_big_n15 + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small_n15 { string key, string value} + serialization.ddl struct bucket_big_n15 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -297,7 +316,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key bucketing_version 2 column.name.delimiter , @@ -305,58 +324,22 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small_n15 + name default.bucket_big_n15 partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small_n15 { string key, string value} + serialization.ddl struct bucket_big_n15 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small_n15 - name: default.bucket_small_n15 - Truncated Path -> Alias: - /bucket_small_n15/ds=2008-04-08 [a] - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 240 Data size: 39008 Basic stats: PARTIAL Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 211 Data size: 34294 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 211 Data size: 34294 Basic stats: PARTIAL Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Position of Big Table: 0 - Statistics: Num rows: 464 Data size: 75446 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 464 Data size: 75446 Basic stats: PARTIAL Column stats: NONE - tag: 0 - auto parallelism: false - Execution mode: llap - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_big_n15 + name: default.bucket_big_n15 #### A masked pattern was here #### Partition input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: bucket_count 4 bucket_field_name key @@ -400,30 +383,62 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big_n15 name: default.bucket_big_n15 + Truncated Path -> Alias: + /bucket_big_n15/ds=2008-04-08 [c] + /bucket_big_n15/ds=2008-04-09 [c] + Map 6 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 368 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 368 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: PARTIAL Column stats: NONE + tag: 1 + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big_n15 - numFiles 4 + name default.bucket_small_n15 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big_n15 { string key, string value} + serialization.ddl struct bucket_small_n15 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -431,7 +446,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key bucketing_version 2 column.name.delimiter , @@ -439,20 +454,19 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big_n15 + name default.bucket_small_n15 partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big_n15 { string key, string value} + serialization.ddl struct bucket_small_n15 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big_n15 - name: default.bucket_big_n15 + name: default.bucket_small_n15 + name: default.bucket_small_n15 Truncated Path -> Alias: - /bucket_big_n15/ds=2008-04-08 [c] - /bucket_big_n15/ds=2008-04-09 [c] - Map 6 + /bucket_small_n15/ds=2008-04-08 [a] + Map 7 Map Operator Tree: TableScan alias: d @@ -524,6 +538,24 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_medium/ds=2008-04-08 [d] Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 255 Data size: 41495 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 255 Data size: 41495 Basic stats: PARTIAL Column stats: NONE + tag: 0 + auto parallelism: false + Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -534,7 +566,7 @@ STAGE PLANS: 0 1 Position of Big Table: 0 - Statistics: Num rows: 1392 Data size: 1027666 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 765 Data size: 564870 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -547,7 +579,7 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Needs Tagging: false Reduce Operator Tree: @@ -585,7 +617,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[54][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select count(*) FROM bucket_small_n15 a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big_n15 c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big_n15 diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out index ebb824632d..c83f7991f4 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out @@ -700,7 +700,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -718,7 +719,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan - alias: c + alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -728,9 +729,24 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 825 Data size: 3300 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 825 Data size: 3300 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + Map 5 Map Operator Tree: TableScan - alias: a + alias: c filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -740,26 +756,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 1361 Data size: 10888 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1361 Data size: 10888 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1267,7 +1290,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1285,7 +1309,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan - alias: c + alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1295,9 +1319,24 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 825 Data size: 3300 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 825 Data size: 3300 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + Map 5 Map Operator Tree: TableScan - alias: a + alias: c filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1307,26 +1346,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 1361 Data size: 10888 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1361 Data size: 10888 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out index 4c5532f32c..ee83ca6108 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out @@ -1578,7 +1578,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1606,6 +1607,21 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + Map 5 Map Operator Tree: TableScan alias: a @@ -1618,26 +1634,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3230,7 +3253,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3258,6 +3282,21 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + Map 5 Map Operator Tree: TableScan alias: a @@ -3270,26 +3309,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out b/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out index d87e3a32fc..25b873eab0 100644 --- a/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketizedhiveinputformat.q.out @@ -22,7 +22,8 @@ POSTHOOK: query: CREATE TABLE T2_n74(name STRING) STORED AS SEQUENCEFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_n74 -Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: INSERT OVERWRITE TABLE T2_n74 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1_n125) tmp1 diff --git a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out index 646ee3af7d..f01baba292 100644 --- a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out @@ -753,7 +753,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -776,7 +777,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: t1_n127 @@ -796,7 +797,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: t3_n29 @@ -823,20 +824,34 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 11 Data size: 935 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 11 Data size: 935 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 935 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out index 3031f68c80..fcd6fd9752 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out @@ -700,10 +700,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -731,7 +732,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: x @@ -752,7 +753,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: x @@ -778,11 +779,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col2, _col3, _col4 Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -798,7 +814,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -813,7 +829,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -836,7 +852,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -906,10 +922,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -937,7 +954,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: x @@ -958,7 +975,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: x @@ -984,11 +1001,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col2, _col3, _col4 Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1004,7 +1036,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1019,7 +1051,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1042,7 +1074,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1112,10 +1144,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) - Map 4 <- Map 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 6 (BROADCAST_EDGE) + Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1134,32 +1166,22 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3, _col4 + outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 - 2 Reducer 5 - Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col3) (type: int), hash(_col4) (type: int), hash(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + 1 Map 2 + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: x @@ -1180,7 +1202,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: y @@ -1237,7 +1259,39 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3, _col4 + input vertices: + 0 Map 1 + Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col3) (type: int), hash(_col4) (type: int), hash(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1252,21 +1306,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out index 4a7704471e..928f2c73ff 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out @@ -75,9 +75,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -100,7 +101,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: y @@ -120,7 +121,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: z @@ -146,26 +147,40 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -187,7 +202,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -261,9 +276,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -295,7 +311,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 6 Map Operator Tree: TableScan alias: y @@ -324,7 +340,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 6 + Map 7 Map Operator Tree: TableScan alias: z @@ -359,29 +375,46 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -409,7 +442,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -510,28 +543,35 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col1 input vertices: 1 Map 4 - 2 Map 5 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -675,7 +715,7 @@ POSTHOOK: Input: default@t1_n146 POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### -13 10 +5 2 PREHOOK: query: EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -709,9 +749,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -739,7 +780,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 6 Map Operator Tree: TableScan alias: y @@ -764,7 +805,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 6 + Map 7 Map Operator Tree: TableScan alias: z @@ -795,29 +836,47 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -845,7 +904,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -925,9 +984,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -955,7 +1015,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 6 Map Operator Tree: TableScan alias: y @@ -980,7 +1040,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 6 + Map 7 Map Operator Tree: TableScan alias: z @@ -1011,29 +1071,47 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1061,7 +1139,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1141,9 +1219,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1171,7 +1250,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 6 Map Operator Tree: TableScan alias: y @@ -1196,7 +1275,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 6 + Map 7 Map Operator Tree: TableScan alias: z @@ -1227,29 +1306,46 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1277,7 +1373,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out index 93a3017696..247baf14af 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out @@ -1873,9 +1873,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1898,7 +1899,7 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -1923,7 +1924,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: y @@ -1949,11 +1950,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 39 Data size: 7059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1967,7 +1983,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1990,7 +2006,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2100,9 +2116,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2125,7 +2142,7 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -2150,7 +2167,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: y @@ -2176,11 +2193,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 39 Data size: 7059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2194,7 +2226,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2217,7 +2249,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2327,9 +2359,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 7 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2357,7 +2390,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: y @@ -2377,7 +2410,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: xx @@ -2441,11 +2474,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 39 Data size: 7059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2554,9 +2602,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 7 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2584,7 +2633,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: y @@ -2604,7 +2653,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: xx @@ -2668,11 +2717,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 39 Data size: 7059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git a/ql/src/test/results/clientpositive/llap/cross_prod_3.q.out b/ql/src/test/results/clientpositive/llap/cross_prod_3.q.out index 9b2db3ab1c..8351da4c7b 100644 --- a/ql/src/test/results/clientpositive/llap/cross_prod_3.q.out +++ b/ql/src/test/results/clientpositive/llap/cross_prod_3.q.out @@ -49,7 +49,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) + Map 1 <- Map 2 (CUSTOM_EDGE) + Map 3 <- Map 1 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -68,27 +69,19 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 2 @@ -125,12 +118,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 4304d9ee56..74bb1a0d20 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -609,8 +609,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -633,7 +634,7 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -653,7 +654,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: alltypesorc_int_n1 @@ -679,12 +680,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -694,7 +709,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -766,11 +781,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Map 6 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Map 7 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -793,7 +809,7 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -826,7 +842,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: alltypesorc_int_n1 @@ -852,12 +868,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -867,7 +897,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -882,7 +912,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out index 2f58017df8..340714860b 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out @@ -456,20 +456,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart_date_n1 - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_11_srcpart_small_n0_key1_min) AND DynamicValue(RS_11_srcpart_small_n0_key1_max) and in_bloom_filter(key, DynamicValue(RS_11_srcpart_small_n0_key1_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_n0_key1_min) AND DynamicValue(RS_13_srcpart_small_n0_key1_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_n0_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key BETWEEN DynamicValue(RS_11_srcpart_small_n0_key1_min) AND DynamicValue(RS_11_srcpart_small_n0_key1_max) and in_bloom_filter(key, DynamicValue(RS_11_srcpart_small_n0_key1_bloom_filter))) and key is not null) (type: boolean) + predicate: ((key BETWEEN DynamicValue(RS_13_srcpart_small_n0_key1_min) AND DynamicValue(RS_13_srcpart_small_n0_key1_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_n0_key1_bloom_filter))) and key is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -482,7 +483,7 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: srcpart_medium_n0 @@ -502,7 +503,7 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: srcpart_small_n0 @@ -541,12 +542,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 4400 Data size: 382800 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 316 Data size: 27492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 316 Data size: 27492 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 347 Data size: 30241 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -556,7 +571,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -571,7 +586,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -848,10 +863,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -874,7 +890,7 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: srcpart_medium_n0 @@ -907,7 +923,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart_small_ext @@ -933,12 +949,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 4400 Data size: 382800 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 316 Data size: 27492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 316 Data size: 27492 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 347 Data size: 30241 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -948,7 +978,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -963,7 +993,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out index 3abf9bfcca..c77ebf701b 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out @@ -148,7 +148,7 @@ POSTHOOK: Output: default@srcpart_small_n2 POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[92][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: EXPLAIN SELECT count(*) FROM ( @@ -208,25 +208,27 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Map 10 <- Reducer 8 (BROADCAST_EDGE) - Map 9 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 10 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) - Reducer 8 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Map 11 <- Reducer 7 (BROADCAST_EDGE) + Map 12 <- Reducer 10 (BROADCAST_EDGE) + Reducer 10 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 9 <- Map 11 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart_date_n6 - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter))) and key is not null) (type: boolean) + predicate: ((key BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter))) and key is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -239,7 +241,32 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 10 + Map 11 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n0 + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cstring BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter))) and cstring is not null) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 12 Map Operator Tree: TableScan alias: srcpart_date_n6 @@ -259,7 +286,7 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: srcpart_small_n2 @@ -310,47 +337,48 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 9 - Map Operator Tree: - TableScan - alias: alltypesorc_int_n0 - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((cstring BETWEEN DynamicValue(RS_24_srcpart_small_n2_key1_min) AND DynamicValue(RS_24_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_24_srcpart_small_n2_key1_bloom_filter))) and cstring is not null) (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 10 Execution mode: vectorized, llap - LLAP IO: all inputs + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: PARTIAL Column stats: NONE + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -359,7 +387,7 @@ STAGE PLANS: keys: 0 1 - Statistics: Num rows: 407313124 Data size: 57586148244 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 101828281 Data size: 14396537061 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -369,7 +397,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -384,7 +412,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -400,33 +428,35 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: PARTIAL Column stats: NONE - Reducer 8 - Execution mode: vectorized, llap + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE + Reducer 9 + Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out index 3ecfb3e068..06f9e7dfed 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out @@ -340,50 +340,56 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_18] - Group By Operator [GBY_16] (rows=1 width=8) + Reducer 4 llap + File Output Operator [FS_20] + Group By Operator [GBY_18] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_15] - Group By Operator [GBY_14] (rows=1 width=8) + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_17] + Group By Operator [GBY_16] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_28] (rows=20182 width=70) - Conds:RS_9._col0=RS_10._col0(Inner),RS_10._col0=RS_11._col0(Inner) - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=2000 width=87) - Output:["_col0"] - Filter Operator [FIL_25] (rows=2000 width=87) - predicate:key is not null - TableScan [TS_0] (rows=2000 width=87) - default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=20 width=87) - Output:["_col0"] - Filter Operator [FIL_26] (rows=20 width=87) - predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) - default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] + Merge Join Operator [MERGEJOIN_30] (rows=10091 width=70) + Conds:RS_12._col1=RS_13._col0(Inner) + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_27] (rows=9174 width=70) + Filter Operator [FIL_28] (rows=9174 width=70) predicate:cstring is not null TableScan [TS_6] (rows=12288 width=70) default@alltypesorc_int_n2,alltypesorc_int_n2,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_29] (rows=2200 width=87) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=2000 width=87) + Output:["_col0"] + Filter Operator [FIL_26] (rows=2000 width=87) + predicate:key is not null + TableScan [TS_0] (rows=2000 width=87) + default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=20 width=87) + Output:["_col0"] + Filter Operator [FIL_27] (rows=20 width=87) + predicate:key1 is not null + TableScan [TS_3] (rows=20 width=87) + default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] PREHOOK: query: select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_small_n4.key1 = alltypesorc_int_n2.cstring) PREHOOK: type: QUERY @@ -429,67 +435,73 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 5 (BROADCAST_EDGE) -Map 6 <- Reducer 5 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 6 (BROADCAST_EDGE) +Map 7 <- Reducer 6 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_18] - Group By Operator [GBY_16] (rows=1 width=8) + Reducer 4 llap + File Output Operator [FS_20] + Group By Operator [GBY_18] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_15] - Group By Operator [GBY_14] (rows=1 width=8) + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_17] + Group By Operator [GBY_16] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_46] (rows=20182 width=70) - Conds:RS_9._col0=RS_10._col0(Inner),RS_10._col0=RS_11._col0(Inner) - <-Map 4 [SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=20 width=87) - Output:["_col0"] - Filter Operator [FIL_26] (rows=20 width=87) - predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) - default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=2000 width=87) - Output:["_col0"] - Filter Operator [FIL_25] (rows=2000 width=87) - predicate:((key BETWEEN DynamicValue(RS_10_srcpart_small_n4_key1_min) AND DynamicValue(RS_10_srcpart_small_n4_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n4_key1_bloom_filter))) and key is not null) - TableScan [TS_0] (rows=2000 width=87) - default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 5 [BROADCAST_EDGE] llap - BROADCAST [RS_32] - Group By Operator [GBY_31] (rows=1 width=639) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20)"] - <-Map 4 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_30] - Group By Operator [GBY_29] (rows=1 width=639) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20)"] - Select Operator [SEL_28] (rows=20 width=87) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_5] - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] + Merge Join Operator [MERGEJOIN_53] (rows=10091 width=70) + Conds:RS_12._col1=RS_13._col0(Inner) + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_27] (rows=9174 width=70) + Filter Operator [FIL_28] (rows=9174 width=70) predicate:((cstring BETWEEN DynamicValue(RS_10_srcpart_small_n4_key1_min) AND DynamicValue(RS_10_srcpart_small_n4_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n4_key1_bloom_filter))) and cstring is not null) TableScan [TS_6] (rows=12288 width=70) default@alltypesorc_int_n2,alltypesorc_int_n2,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] - <-Reducer 5 [BROADCAST_EDGE] llap - BROADCAST [RS_45] - Please refer to the previous Group By Operator [GBY_31] + <-Reducer 6 [BROADCAST_EDGE] llap + BROADCAST [RS_50] + Group By Operator [GBY_32] (rows=1 width=639) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_31] + Group By Operator [GBY_30] (rows=1 width=639) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20)"] + Select Operator [SEL_29] (rows=20 width=87) + Output:["_col0"] + Select Operator [SEL_5] (rows=20 width=87) + Output:["_col0"] + Filter Operator [FIL_27] (rows=20 width=87) + predicate:key1 is not null + TableScan [TS_3] (rows=20 width=87) + default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_52] (rows=2200 width=87) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"] + <-Map 5 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_10] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_5] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=2000 width=87) + Output:["_col0"] + Filter Operator [FIL_26] (rows=2000 width=87) + predicate:((key BETWEEN DynamicValue(RS_10_srcpart_small_n4_key1_min) AND DynamicValue(RS_10_srcpart_small_n4_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n4_key1_bloom_filter))) and key is not null) + TableScan [TS_0] (rows=2000 width=87) + default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Reducer 6 [BROADCAST_EDGE] llap + BROADCAST [RS_33] + Please refer to the previous Group By Operator [GBY_32] PREHOOK: query: select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_small_n4.key1 = alltypesorc_int_n2.cstring) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/empty_join.q.out b/ql/src/test/results/clientpositive/llap/empty_join.q.out index f65c8bdcd8..5b8020ffb8 100644 --- a/ql/src/test/results/clientpositive/llap/empty_join.q.out +++ b/ql/src/test/results/clientpositive/llap/empty_join.q.out @@ -70,33 +70,35 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_46] - Map Join Operator [MAPJOIN_45] (rows=2 width=4) - Conds:SEL_44._col0=RS_39._col0(Left Outer),SEL_44._col0=RS_42._col0(Inner),Output:["_col0","_col1","_col2"] - <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_39] - PartitionCols:_col0 - Select Operator [SEL_38] (rows=1 width=4) - Output:["_col0"] - Filter Operator [FIL_37] (rows=1 width=4) - predicate:id is not null - TableScan [TS_3] (rows=1 width=4) - default@test_2_n2,t2,Tbl:COMPLETE,Col:NONE,Output:["id"] + File Output Operator [FS_55] + Map Join Operator [MAPJOIN_54] (rows=1 width=4) + Conds:MAPJOIN_53._col0=RS_50._col0(Inner),Output:["_col0","_col1","_col2"] <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_42] + BROADCAST [RS_50] PartitionCols:_col0 - Select Operator [SEL_41] (rows=1 width=4) + Select Operator [SEL_49] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_40] (rows=1 width=4) + Filter Operator [FIL_48] (rows=1 width=4) predicate:id is not null TableScan [TS_6] (rows=1 width=4) default@test_3_n0,t3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] - <-Select Operator [SEL_44] (rows=1 width=4) - Output:["_col0"] - Filter Operator [FIL_43] (rows=1 width=4) - predicate:id is not null - TableScan [TS_0] (rows=1 width=4) - default@test_1_n2,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] + <-Map Join Operator [MAPJOIN_53] (rows=1 width=4) + Conds:SEL_52._col0=RS_47._col0(Left Outer),Output:["_col0","_col1"] + <-Map 2 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_47] + PartitionCols:_col0 + Select Operator [SEL_46] (rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_45] (rows=1 width=4) + predicate:id is not null + TableScan [TS_3] (rows=1 width=4) + default@test_2_n2,t2,Tbl:COMPLETE,Col:NONE,Output:["id"] + <-Select Operator [SEL_52] (rows=1 width=4) + Output:["_col0"] + Filter Operator [FIL_51] (rows=1 width=4) + predicate:id is not null + TableScan [TS_0] (rows=1 width=4) + default@test_1_n2,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: SELECT t1.id, t2.id, t3.id FROM test_1_n2 t1 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 2bb6352a7e..1ee459b5d6 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -433,73 +433,79 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_29] - Select Operator [SEL_27] (rows=1 width=20) + Reducer 5 llap + File Output Operator [FS_31] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] - Select Operator [SEL_25] (rows=1 width=28) + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_28] + Select Operator [SEL_27] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_24] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col5, _col1 - Select Operator [SEL_21] (rows=1 width=24) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col5"] - Merge Join Operator [MERGEJOIN_57] (rows=1 width=24) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7"],residual filter predicates:{((_col4 > 0) or _col2)} {((_col4 + _col7) >= 0)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_36] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=2 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_37] (rows=5 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + Merge Join Operator [MERGEJOIN_64] (rows=1 width=20) + Conds:RS_20._col3=RS_21._col0(Inner),Output:["_col1","_col4","_col5","_col7"],residual filter predicates:{((_col4 + _col7) >= 0)} + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_20] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_63] (rows=5 width=104) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_37] (rows=18 width=84) + predicate:key is not null + TableScan [TS_0] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=2 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_8] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_38] (rows=5 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 Select Operator [SEL_16] (rows=2 width=89) Output:["_col0","_col1"] Group By Operator [GBY_15] (rows=2 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap + <-Map 8 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_13] (rows=2 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_38] (rows=5 width=93) + Filter Operator [FIL_39] (rows=5 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -523,73 +529,79 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_29] - Select Operator [SEL_27] (rows=1 width=20) + Reducer 5 llap + File Output Operator [FS_31] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] - Select Operator [SEL_25] (rows=1 width=28) + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_28] + Select Operator [SEL_27] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_24] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7 - Select Operator [SEL_21] (rows=1 width=36) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col7"] - Merge Join Operator [MERGEJOIN_57] (rows=1 width=36) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0)} {((_col6 + _col9) >= 0)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=99) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_36] (rows=18 width=84) - predicate:((c_int > 0) and key is not null) - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_37] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + Merge Join Operator [MERGEJOIN_64] (rows=1 width=20) + Conds:RS_20._col5=RS_21._col0(Inner),Output:["_col1","_col6","_col7","_col9"],residual filter predicates:{((_col6 + _col9) >= 0)} + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_20] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_63] (rows=1 width=117) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0)} + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=18 width=99) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_37] (rows=18 width=84) + predicate:((c_int > 0) and key is not null) + TableScan [TS_0] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_38] (rows=2 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap + <-Map 8 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_38] (rows=2 width=93) + Filter Operator [FIL_39] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -613,66 +625,72 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_26] - Group By Operator [GBY_24] (rows=1 width=20) + Reducer 4 llap + File Output Operator [FS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5 - Select Operator [SEL_21] (rows=1 width=24) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col5"] - Merge Join Operator [MERGEJOIN_54] (rows=1 width=24) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7"],residual filter predicates:{((_col4 > 0) or _col2)} {((_col4 + _col7) >= 2)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_33] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_34] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + Merge Join Operator [MERGEJOIN_61] (rows=1 width=20) + Conds:RS_20._col3=RS_21._col0(Inner),Output:["_col1","_col4","_col5","_col7"],residual filter predicates:{((_col4 + _col7) >= 2)} + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_20] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_60] (rows=2 width=105) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_34] (rows=18 width=84) + predicate:key is not null + TableScan [TS_0] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_35] (rows=2 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [SIMPLE_EDGE] llap + <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=2 width=93) + Filter Operator [FIL_36] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -696,71 +714,77 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_28] - Select Operator [SEL_27] (rows=1 width=20) + Reducer 5 llap + File Output Operator [FS_30] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] - Group By Operator [GBY_24] (rows=1 width=20) + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7 - Select Operator [SEL_21] (rows=1 width=36) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col7"] - Merge Join Operator [MERGEJOIN_56] (rows=1 width=36) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0)} {((_col6 + _col9) >= 0)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=99) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_35] (rows=18 width=84) - predicate:((c_int > 0) and key is not null) - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + Merge Join Operator [MERGEJOIN_63] (rows=1 width=20) + Conds:RS_20._col5=RS_21._col0(Inner),Output:["_col1","_col6","_col7","_col9"],residual filter predicates:{((_col6 + _col9) >= 0)} + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_20] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_62] (rows=1 width=117) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0)} + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=18 width=99) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_36] (rows=18 width=84) + predicate:((c_int > 0) and key is not null) + TableScan [TS_0] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_37] (rows=2 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap + <-Map 8 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_37] (rows=2 width=93) + Filter Operator [FIL_38] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -784,66 +808,72 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_26] - Group By Operator [GBY_24] (rows=1 width=20) + Reducer 4 llap + File Output Operator [FS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5 - Select Operator [SEL_21] (rows=1 width=24) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col5"] - Merge Join Operator [MERGEJOIN_54] (rows=1 width=24) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7"],residual filter predicates:{((_col4 > 0) or _col2)} {((_col4 + _col7) >= 0)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_33] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_34] (rows=2 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + Merge Join Operator [MERGEJOIN_61] (rows=1 width=20) + Conds:RS_20._col3=RS_21._col0(Inner),Output:["_col1","_col4","_col5","_col7"],residual filter predicates:{((_col4 + _col7) >= 0)} + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_20] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_60] (rows=2 width=105) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_34] (rows=18 width=84) + predicate:key is not null + TableScan [TS_0] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_35] (rows=2 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 Select Operator [SEL_16] (rows=1 width=89) Output:["_col0","_col1"] Group By Operator [GBY_15] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [SIMPLE_EDGE] llap + <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_13] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=2 width=93) + Filter Operator [FIL_36] (rows=2 width=93) predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (c_float > 0.0) and key is not null) TableScan [TS_10] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1179,45 +1209,51 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=194 width=100) + Reducer 3 llap + File Output Operator [FS_16] + Select Operator [SEL_15] (rows=194 width=100) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_42] (rows=194 width=100) - Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=87) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_21] (rows=18 width=87) - predicate:key is not null - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=18 width=84) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=18 width=84) - predicate:key is not null - TableScan [TS_3] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] + Merge Join Operator [MERGEJOIN_49] (rows=194 width=100) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=18 width=84) + Filter Operator [FIL_24] (rows=18 width=84) predicate:key is not null TableScan [TS_6] (rows=20 width=84) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_48] (rows=54 width=95) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=18 width=87) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_22] (rows=18 width=87) + predicate:key is not null + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=18 width=84) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=18 width=84) + predicate:key is not null + TableScan [TS_3] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] PREHOOK: query: explain select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY @@ -1238,45 +1274,51 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=194 width=177) + Reducer 3 llap + File Output Operator [FS_16] + Select Operator [SEL_15] (rows=194 width=177) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_42] (rows=194 width=177) - Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col0","_col1","_col3","_col4"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=84) - Output:["_col0","_col1"] - Filter Operator [FIL_21] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=18 width=80) - Output:["_col0"] - Filter Operator [FIL_22] (rows=18 width=80) - predicate:key is not null - TableScan [TS_3] (rows=20 width=80) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] + Merge Join Operator [MERGEJOIN_49] (rows=194 width=177) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col3","_col4"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=18 width=84) + Filter Operator [FIL_24] (rows=18 width=84) predicate:key is not null TableScan [TS_6] (rows=20 width=84) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_48] (rows=54 width=87) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=18 width=84) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=18 width=84) + predicate:key is not null + TableScan [TS_0] (rows=20 width=84) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=18 width=80) + Output:["_col0"] + Filter Operator [FIL_23] (rows=18 width=80) + predicate:key is not null + TableScan [TS_3] (rows=20 width=80) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0) PREHOOK: type: QUERY @@ -1297,45 +1339,51 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=24 width=101) + Reducer 3 llap + File Output Operator [FS_16] + Select Operator [SEL_15] (rows=24 width=100) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_42] (rows=24 width=105) - Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"],residual filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or _col7)} {((_col1 > 0) or (_col6 >= 0))} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_21] (rows=9 width=93) - predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=9 width=93) - predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] + Merge Join Operator [MERGEJOIN_49] (rows=24 width=104) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"],residual filter predicates:{((_col1 > 0) or _col7)} {((_col1 > 0) or (_col6 >= 0))} + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_23] (rows=18 width=84) + Filter Operator [FIL_24] (rows=18 width=84) predicate:key is not null TableScan [TS_6] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_48] (rows=8 width=182) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"],residual filter predicates:{((_col1 + _col4) = 2)} + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=9 width=93) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_22] (rows=9 width=93) + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=9 width=89) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=9 width=93) + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 right outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0) PREHOOK: type: QUERY @@ -1356,45 +1404,51 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=24 width=101) + Reducer 3 llap + File Output Operator [FS_16] + Select Operator [SEL_15] (rows=24 width=100) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_42] (rows=24 width=105) - Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"],residual filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or _col7)} {((_col1 > 0) or (_col6 >= 0))} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_21] (rows=9 width=93) - predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=9 width=93) - predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] + Merge Join Operator [MERGEJOIN_49] (rows=24 width=104) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"],residual filter predicates:{((_col1 > 0) or _col7)} {((_col1 > 0) or (_col6 >= 0))} + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] PartitionCols:_col0 Select Operator [SEL_8] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_23] (rows=18 width=84) + Filter Operator [FIL_24] (rows=18 width=84) predicate:key is not null TableScan [TS_6] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_12] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_48] (rows=8 width=182) + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"],residual filter predicates:{((_col1 + _col4) = 2)} + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=9 width=93) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_22] (rows=9 width=93) + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=9 width=89) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=9 width=93) + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key order by x limit 1 PREHOOK: type: QUERY @@ -1577,100 +1631,106 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 llap - File Output Operator [FS_42] - Limit [LIM_40] (rows=1 width=28) + Reducer 7 llap + File Output Operator [FS_44] + Limit [LIM_42] (rows=1 width=28) Number of rows:5 - Select Operator [SEL_39] (rows=1 width=28) + Select Operator [SEL_41] (rows=1 width=28) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] - Select Operator [SEL_37] (rows=1 width=28) + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_40] + Select Operator [SEL_39] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_36] (rows=1 width=20) + Group By Operator [GBY_38] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_35] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_37] PartitionCols:_col0, _col1 - Group By Operator [GBY_34] (rows=1 width=20) + Group By Operator [GBY_36] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col7 - Select Operator [SEL_33] (rows=2 width=28) + Select Operator [SEL_35] (rows=2 width=20) Output:["_col4","_col7"] - Merge Join Operator [MERGEJOIN_65] (rows=2 width=28) - Conds:RS_29._col0=RS_30._col0(Inner),RS_30._col0=RS_31._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8"],residual filter predicates:{((_col3 + _col1) >= 0)} {(_col5 or _col8)} - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] + Merge Join Operator [MERGEJOIN_72] (rows=2 width=20) + Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col4","_col5","_col7","_col8"],residual filter predicates:{(_col5 or _col8)} + <-Map 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_33] PartitionCols:_col0 Select Operator [SEL_28] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_53] (rows=18 width=84) + Filter Operator [FIL_54] (rows=18 width=84) predicate:key is not null TableScan [TS_26] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_29] - PartitionCols:_col0 - Filter Operator [FIL_11] (rows=2 width=105) - predicate:_col0 is not null - Limit [LIM_9] (rows=3 width=105) - Number of rows:5 - Select Operator [SEL_8] (rows=3 width=105) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - Select Operator [SEL_6] (rows=3 width=105) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_50] (rows=6 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_30] - PartitionCols:_col0 - Select Operator [SEL_25] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_24] (rows=2 width=97) - predicate:_col0 is not null - Limit [LIM_22] (rows=3 width=97) - Number of rows:5 - Select Operator [SEL_21] (rows=3 width=97) - Output:["_col0","_col1","_col2"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - Select Operator [SEL_19] (rows=3 width=97) + <-Reducer 4 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_32] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_71] (rows=1 width=105) + Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col3 + _col1) >= 0)} + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_25] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_24] (rows=2 width=97) + predicate:_col0 is not null + Limit [LIM_22] (rows=3 width=97) + Number of rows:5 + Select Operator [SEL_21] (rows=3 width=97) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_18] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Top N Key Operator [TNK_54] (rows=6 width=93) - keys:key, c_int, c_float,sort order:+++,top n:5 - Filter Operator [FIL_52] (rows=6 width=93) + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] + Select Operator [SEL_19] (rows=3 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_18] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_16] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Top N Key Operator [TNK_55] (rows=6 width=93) + keys:key, c_int, c_float,sort order:+++,top n:5 + Filter Operator [FIL_53] (rows=6 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) + TableScan [TS_13] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_29] + PartitionCols:_col0 + Filter Operator [FIL_11] (rows=2 width=105) + predicate:_col0 is not null + Limit [LIM_9] (rows=3 width=105) + Number of rows:5 + Select Operator [SEL_8] (rows=3 width=105) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + Select Operator [SEL_6] (rows=3 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_5] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_51] (rows=6 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) - TableScan [TS_13] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + TableScan [TS_0] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) PREHOOK: type: QUERY @@ -1741,49 +1801,55 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_18] - Select Operator [SEL_17] (rows=10 width=93) + Reducer 3 llap + File Output Operator [FS_20] + Select Operator [SEL_19] (rows=9 width=93) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_46] (rows=10 width=93) - Conds:RS_13._col0=RS_14._col0(Left Semi),RS_13._col0=RS_15._col0(Left Semi),Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_25] (rows=9 width=93) - predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_10] (rows=4 width=85) - Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=9 width=85) - Output:["_col0"] - Filter Operator [FIL_26] (rows=9 width=93) - predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + Merge Join Operator [MERGEJOIN_53] (rows=9 width=93) + Conds:RS_16._col0=RS_17._col0(Left Semi),Output:["_col0","_col1","_col2"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_12] (rows=7 width=85) + Group By Operator [GBY_15] (rows=7 width=85) Output:["_col0"],keys:_col0 Select Operator [SEL_8] (rows=18 width=80) Output:["_col0"] - Filter Operator [FIL_27] (rows=18 width=80) + Filter Operator [FIL_28] (rows=18 width=80) predicate:key is not null TableScan [TS_6] (rows=20 width=80) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_16] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_52] (rows=9 width=93) + Conds:RS_11._col0=RS_12._col0(Left Semi),Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=9 width=93) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_26] (rows=9 width=93) + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_10] (rows=4 width=85) + Output:["_col0"],keys:_col0 + Select Operator [SEL_5] (rows=9 width=85) + Output:["_col0"] + Filter Operator [FIL_27] (rows=9 width=93) + predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0)) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select a, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by a+b desc, c asc) cbo_t1 left semi join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by q+r/10 desc, p) cbo_t2 on cbo_t1.a=p left semi join cbo_t3 on cbo_t1.a=key where (b + 1 >= 0) and (b > 0 or a >= 0) group by a, c having a > 0 and (a >=1 or c >= 1) and (a + c) >= 0 order by c, a PREHOOK: type: QUERY @@ -1805,81 +1871,87 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 5 llap - File Output Operator [FS_34] - Select Operator [SEL_33] (rows=1 width=101) + Reducer 6 llap + File Output Operator [FS_36] + Select Operator [SEL_35] (rows=1 width=101) Output:["_col0","_col1","_col2"] - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] - Select Operator [SEL_31] (rows=1 width=101) + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_34] + Select Operator [SEL_33] (rows=1 width=101) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_30] (rows=1 width=101) + Group By Operator [GBY_32] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_29] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_31] PartitionCols:_col0, _col1 - Group By Operator [GBY_28] (rows=1 width=101) + Group By Operator [GBY_30] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Merge Join Operator [MERGEJOIN_62] (rows=1 width=93) - Conds:RS_23._col0=RS_24._col0(Left Semi),RS_23._col0=RS_25._col0(Left Semi),Output:["_col0","_col1"] - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + Merge Join Operator [MERGEJOIN_69] (rows=1 width=93) + Conds:RS_26._col0=RS_27._col0(Left Semi),Output:["_col0","_col1"] + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_27] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=3 width=85) + Group By Operator [GBY_25] (rows=3 width=85) Output:["_col0"],keys:_col0 Select Operator [SEL_18] (rows=6 width=85) Output:["_col0"] - Filter Operator [FIL_43] (rows=6 width=85) + Filter Operator [FIL_44] (rows=6 width=85) predicate:(UDFToDouble(key) > 0.0D) TableScan [TS_16] (rows=20 width=80) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=1 width=93) - Output:["_col0","_col1"] - Filter Operator [FIL_7] (rows=1 width=101) - predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0D) and ((UDFToDouble(_col2) >= 1.0D) or (_col3 >= 1L))) - Select Operator [SEL_6] (rows=1 width=101) - Output:["_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_41] (rows=1 width=93) - predicate:((((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0D)) and ((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (UDFToDouble(key) > 0.0D) and (c_float > 0.0)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] + <-Reducer 3 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_26] PartitionCols:_col0 - Group By Operator [GBY_20] (rows=1 width=85) - Output:["_col0"],keys:_col0 - Select Operator [SEL_15] (rows=1 width=85) - Output:["_col0"] - Group By Operator [GBY_14] (rows=1 width=93) - Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_12] (rows=1 width=93) - Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_42] (rows=1 width=93) - predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (UDFToDouble(key) > 0.0D) and (c_float > 0.0)) - TableScan [TS_9] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Merge Join Operator [MERGEJOIN_68] (rows=1 width=93) + Conds:RS_21._col0=RS_22._col0(Left Semi),Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1 width=93) + Output:["_col0","_col1"] + Filter Operator [FIL_7] (rows=1 width=101) + predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0D) and ((UDFToDouble(_col2) >= 1.0D) or (_col3 >= 1L))) + Select Operator [SEL_6] (rows=1 width=101) + Output:["_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_42] (rows=1 width=93) + predicate:((((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0D)) and ((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (UDFToDouble(key) > 0.0D) and (c_float > 0.0)) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col0 + Group By Operator [GBY_20] (rows=1 width=85) + Output:["_col0"],keys:_col0 + Select Operator [SEL_15] (rows=1 width=85) + Output:["_col0"] + Group By Operator [GBY_14] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_12] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:key, c_int, c_float + Filter Operator [FIL_43] (rows=1 width=93) + predicate:(((UDFToFloat(c_int) + c_float) >= 0) and ((c_int + 1) >= 0) and ((c_int > 0) or c_float is not null) and ((c_int >= 1) or (c_float >= 1.0)) and (UDFToDouble(key) > 0.0D) and (c_float > 0.0)) + TableScan [TS_9] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 PREHOOK: type: QUERY @@ -3917,39 +3989,42 @@ POSTHOOK: Input: default@myinput1_n7 Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_11] - Select Operator [SEL_10] (rows=2 width=8) + Reducer 3 llap + File Output Operator [FS_12] + Select Operator [SEL_11] (rows=1 width=8) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_39] (rows=2 width=8) - Conds:RS_4.key=RS_6.value(Inner),RS_4.key=RS_8.key(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11"] + Merge Join Operator [MERGEJOIN_41] (rows=1 width=8) + Conds:RS_7._col0=RS_9.key(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] + SHUFFLE [RS_9] PartitionCols:key - Filter Operator [FIL_18] (rows=1 width=8) + Filter Operator [FIL_16] (rows=1 width=8) predicate:key is not null TableScan [TS_0] (rows=1 width=8) default@myinput1_n7,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:value - Filter Operator [FIL_19] (rows=1 width=8) - predicate:value is not null - TableScan [TS_1] (rows=1 width=8) - default@myinput1_n7,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:key - Filter Operator [FIL_20] (rows=1 width=8) - predicate:key is not null - TableScan [TS_2] (rows=1 width=8) - default@myinput1_n7,c,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_7] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_40] (rows=1 width=8) + Conds:RS_3.key=RS_4.value(Inner),Output:["_col0","_col1","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_3] + PartitionCols:key + Please refer to the previous Filter Operator [FIL_16] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:value + Filter Operator [FIL_17] (rows=1 width=8) + predicate:value is not null + TableScan [TS_1] (rows=1 width=8) + default@myinput1_n7,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain select * from myinput1_n7 a join myinput1_n7 b on a.key<=>b.value join myinput1_n7 c on a.key<=>c.key PREHOOK: type: QUERY @@ -3962,33 +4037,38 @@ POSTHOOK: Input: default@myinput1_n7 Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=2 width=8) + Reducer 3 llap + File Output Operator [FS_10] + Select Operator [SEL_9] (rows=1 width=8) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_9] (rows=2 width=8) - Conds:RS_3.key=RS_4.value(Inner),RS_3.key=RS_5.key(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11"] + Merge Join Operator [MERGEJOIN_12] (rows=1 width=8) + Conds:RS_6._col0=RS_7.key(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_3] + SHUFFLE [RS_7] PartitionCols:key TableScan [TS_0] (rows=1 width=8) default@myinput1_n7,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:value - TableScan [TS_1] (rows=1 width=8) - default@myinput1_n7,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - PartitionCols:key - TableScan [TS_2] (rows=1 width=8) - default@myinput1_n7,c,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_6] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_11] (rows=1 width=8) + Conds:RS_3.key=RS_4.value(Inner),Output:["_col0","_col1","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_3] + PartitionCols:key + Please refer to the previous TableScan [TS_0] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:value + TableScan [TS_1] (rows=1 width=8) + default@myinput1_n7,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain select * from myinput1_n7 a join myinput1_n7 b on a.key<=>b.value AND a.value=b.key join myinput1_n7 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY @@ -4001,39 +4081,42 @@ POSTHOOK: Input: default@myinput1_n7 Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_11] - Select Operator [SEL_10] (rows=2 width=8) + Reducer 3 llap + File Output Operator [FS_14] + Select Operator [SEL_13] (rows=1 width=8) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_15] (rows=2 width=8) - Conds:RS_4.key, value=RS_6.value, key(Inner),RS_4.key, value=RS_8.key, value(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11"] + Merge Join Operator [MERGEJOIN_19] (rows=1 width=8) + Conds:RS_9._col0, _col1=RS_11.key, value(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] + SHUFFLE [RS_11] PartitionCols:key, value - Filter Operator [FIL_12] (rows=1 width=8) + Filter Operator [FIL_15] (rows=1 width=8) predicate:value is not null TableScan [TS_0] (rows=1 width=8) default@myinput1_n7,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:value, key - Filter Operator [FIL_13] (rows=1 width=8) - predicate:key is not null - TableScan [TS_1] (rows=1 width=8) - default@myinput1_n7,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:key, value - Filter Operator [FIL_14] (rows=1 width=8) - predicate:value is not null - TableScan [TS_2] (rows=1 width=8) - default@myinput1_n7,c,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_9] + PartitionCols:_col0, _col1 + Merge Join Operator [MERGEJOIN_18] (rows=1 width=8) + Conds:RS_4.key, value=RS_6.value, key(Inner),Output:["_col0","_col1","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:key, value + Please refer to the previous Filter Operator [FIL_15] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:value, key + Filter Operator [FIL_16] (rows=1 width=8) + predicate:key is not null + TableScan [TS_1] (rows=1 width=8) + default@myinput1_n7,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain select * from myinput1_n7 a join myinput1_n7 b on a.key<=>b.value AND a.value<=>b.key join myinput1_n7 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY @@ -4046,33 +4129,38 @@ POSTHOOK: Input: default@myinput1_n7 Plan not optimized by CBO due to missing feature [Less_than_equal_greater_than]. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 llap - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=2 width=8) + Reducer 3 llap + File Output Operator [FS_10] + Select Operator [SEL_9] (rows=1 width=8) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_9] (rows=2 width=8) - Conds:RS_3.key, value=RS_4.value, key(Inner),RS_3.key, value=RS_5.key, value(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11"] + Merge Join Operator [MERGEJOIN_12] (rows=1 width=8) + Conds:RS_6._col0, _col1=RS_7.key, value(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_3] + SHUFFLE [RS_7] PartitionCols:key, value TableScan [TS_0] (rows=1 width=8) default@myinput1_n7,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:value, key - TableScan [TS_1] (rows=1 width=8) - default@myinput1_n7,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_5] - PartitionCols:key, value - TableScan [TS_2] (rows=1 width=8) - default@myinput1_n7,c,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 2 [ONE_TO_ONE_EDGE] llap + FORWARD [RS_6] + PartitionCols:_col0, _col1 + Merge Join Operator [MERGEJOIN_11] (rows=1 width=8) + Conds:RS_3.key, value=RS_4.value, key(Inner),Output:["_col0","_col1","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_3] + PartitionCols:key, value + Please refer to the previous TableScan [TS_0] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:value, key + TableScan [TS_1] (rows=1 width=8) + default@myinput1_n7,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain select * FROM myinput1_n7 a LEFT OUTER JOIN myinput1_n7 b ON a.key<=>b.value PREHOOK: type: QUERY @@ -6017,51 +6105,54 @@ POSTHOOK: Input: default@t4_n15 Plan optimized by CBO. Vertex dependency in root stage -Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) +Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +Map 3 <- Map 4 (BROADCAST_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Map 3 llap - File Output Operator [FS_21] - Map Join Operator [MAPJOIN_71] (rows=2 width=404) - Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_70._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 1 [BROADCAST_EDGE] llap - BROADCAST [RS_16] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=1 width=368) - Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=1 width=368) - predicate:key is not null - TableScan [TS_0] (rows=1 width=368) - default@t1_n119,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map 2 [BROADCAST_EDGE] llap - BROADCAST [RS_17] + Map 1 llap + File Output Operator [FS_23] + Map Join Operator [MAPJOIN_78] (rows=1 width=444) + Conds:MAPJOIN_76._col2=RS_20._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_20] PartitionCols:_col0 - Select Operator [SEL_5] (rows=1 width=368) - Output:["_col0","_col1"] - Filter Operator [FIL_33] (rows=1 width=368) - predicate:key is not null - TableScan [TS_3] (rows=1 width=368) - default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map Join Operator [MAPJOIN_70] (rows=1 width=404) - Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 4 [BROADCAST_EDGE] llap - BROADCAST [RS_13] + Map Join Operator [MAPJOIN_77] (rows=1 width=404) + Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 4 [BROADCAST_EDGE] llap + BROADCAST [RS_13] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=1 width=368) + Output:["_col0","_col1"] + Filter Operator [FIL_35] (rows=1 width=368) + predicate:key is not null + TableScan [TS_9] (rows=1 width=368) + default@t4_n15,d,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Select Operator [SEL_8] (rows=1 width=368) + Output:["_col0","_col1"] + Filter Operator [FIL_34] (rows=1 width=368) + predicate:key is not null + TableScan [TS_6] (rows=1 width=368) + default@t3_n26,c,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Map Join Operator [MAPJOIN_76] (rows=1 width=404) + Conds:SEL_2._col0=RS_17._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 2 [BROADCAST_EDGE] llap + BROADCAST [RS_17] PartitionCols:_col0 - Select Operator [SEL_11] (rows=1 width=368) + Select Operator [SEL_5] (rows=1 width=368) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=1 width=368) + Filter Operator [FIL_33] (rows=1 width=368) predicate:key is not null - TableScan [TS_9] (rows=1 width=368) - default@t4_n15,d,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Select Operator [SEL_8] (rows=1 width=368) + TableScan [TS_3] (rows=1 width=368) + default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Select Operator [SEL_2] (rows=1 width=368) Output:["_col0","_col1"] - Filter Operator [FIL_34] (rows=1 width=368) + Filter Operator [FIL_32] (rows=1 width=368) predicate:key is not null - TableScan [TS_6] (rows=1 width=368) - default@t3_n26,c,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + TableScan [TS_0] (rows=1 width=368) + default@t1_n119,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] PREHOOK: query: explain select /*+ STREAMTABLE(a,c) */ * @@ -6088,51 +6179,54 @@ POSTHOOK: Input: default@t4_n15 Plan optimized by CBO. Vertex dependency in root stage -Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) +Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +Map 3 <- Map 4 (BROADCAST_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Map 3 llap - File Output Operator [FS_21] - Map Join Operator [MAPJOIN_71] (rows=2 width=404) - Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_70._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 1 [BROADCAST_EDGE] llap - BROADCAST [RS_16] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=1 width=368) - Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=1 width=368) - predicate:key is not null - TableScan [TS_0] (rows=1 width=368) - default@t1_n119,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map 2 [BROADCAST_EDGE] llap - BROADCAST [RS_17] + Map 1 llap + File Output Operator [FS_23] + Map Join Operator [MAPJOIN_78] (rows=1 width=444) + Conds:MAPJOIN_76._col2=RS_20._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_20] PartitionCols:_col0 - Select Operator [SEL_5] (rows=1 width=368) - Output:["_col0","_col1"] - Filter Operator [FIL_33] (rows=1 width=368) - predicate:key is not null - TableScan [TS_3] (rows=1 width=368) - default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map Join Operator [MAPJOIN_70] (rows=1 width=404) - Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 4 [BROADCAST_EDGE] llap - BROADCAST [RS_13] + Map Join Operator [MAPJOIN_77] (rows=1 width=404) + Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 4 [BROADCAST_EDGE] llap + BROADCAST [RS_13] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=1 width=368) + Output:["_col0","_col1"] + Filter Operator [FIL_35] (rows=1 width=368) + predicate:key is not null + TableScan [TS_9] (rows=1 width=368) + default@t4_n15,d,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Select Operator [SEL_8] (rows=1 width=368) + Output:["_col0","_col1"] + Filter Operator [FIL_34] (rows=1 width=368) + predicate:key is not null + TableScan [TS_6] (rows=1 width=368) + default@t3_n26,c,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Map Join Operator [MAPJOIN_76] (rows=1 width=404) + Conds:SEL_2._col0=RS_17._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 2 [BROADCAST_EDGE] llap + BROADCAST [RS_17] PartitionCols:_col0 - Select Operator [SEL_11] (rows=1 width=368) + Select Operator [SEL_5] (rows=1 width=368) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=1 width=368) + Filter Operator [FIL_33] (rows=1 width=368) predicate:key is not null - TableScan [TS_9] (rows=1 width=368) - default@t4_n15,d,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Select Operator [SEL_8] (rows=1 width=368) + TableScan [TS_3] (rows=1 width=368) + default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Select Operator [SEL_2] (rows=1 width=368) Output:["_col0","_col1"] - Filter Operator [FIL_34] (rows=1 width=368) + Filter Operator [FIL_32] (rows=1 width=368) predicate:key is not null - TableScan [TS_6] (rows=1 width=368) - default@t3_n26,c,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + TableScan [TS_0] (rows=1 width=368) + default@t1_n119,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] PREHOOK: query: explain FROM T1_n119 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out index f671821e70..de3f2e2332 100644 --- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_2.q.out @@ -76,31 +76,38 @@ STAGE PLANS: TableScan alias: z filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 @@ -126,10 +133,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -226,31 +233,38 @@ STAGE PLANS: TableScan alias: z filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 @@ -276,10 +290,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -362,8 +376,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -387,50 +402,64 @@ STAGE PLANS: TableScan alias: z filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 4 - 3 Map 5 - Statistics: Num rows: 1584 Data size: 12672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: w filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 2 + Statistics: Num rows: 1000 Data size: 86000 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 5 + Statistics: Num rows: 1582 Data size: 12656 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -449,17 +478,17 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -497,7 +526,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -5680 +0 PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM src1 x JOIN srcpart z ON (x.key = z.key) @@ -535,8 +564,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -560,50 +590,64 @@ STAGE PLANS: TableScan alias: z filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 4 - 3 Map 5 - Statistics: Num rows: 1584 Data size: 12672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: w filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 2 + Statistics: Num rows: 1000 Data size: 86000 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 5 + Statistics: Num rows: 1582 Data size: 12656 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -622,17 +666,17 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -670,7 +714,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -5680 +0 PREHOOK: query: SELECT 1 PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -767,31 +811,38 @@ STAGE PLANS: TableScan alias: z filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 6 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 6 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 6 @@ -831,31 +882,38 @@ STAGE PLANS: TableScan alias: z filterExpr: value is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 value (type: string) 1 value (type: string) - 2 value (type: string) + outputColumnNames: _col1 input vertices: 0 Map 7 - 2 Map 10 - Statistics: Num rows: 265 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 162 Data size: 14418 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + input vertices: + 1 Map 10 + Statistics: Num rows: 263 Data size: 2104 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 @@ -865,17 +923,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -883,10 +941,10 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -898,17 +956,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Union 4 Vertex: Union 4 @@ -1039,31 +1097,38 @@ STAGE PLANS: TableScan alias: z filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 6 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 6 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 6 @@ -1103,31 +1168,38 @@ STAGE PLANS: TableScan alias: z filterExpr: value is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 value (type: string) 1 value (type: string) - 2 value (type: string) + outputColumnNames: _col1 input vertices: 0 Map 7 - 2 Map 10 - Statistics: Num rows: 265 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 162 Data size: 14418 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + input vertices: + 1 Map 10 + Statistics: Num rows: 263 Data size: 2104 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 3 @@ -1137,17 +1209,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1155,10 +1227,10 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1170,17 +1242,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator keys: _col0 (type: bigint) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Union 4 Vertex: Union 4 @@ -1304,60 +1376,74 @@ STAGE PLANS: TableScan alias: z1 filterExpr: (key < 'zzzzzzzz') (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key < 'zzzzzzzz') (type: boolean) - Statistics: Num rows: 666 Data size: 57942 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 57942 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 19 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 700 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 _col1 (type: string) - 1 value (type: string) - 2 value (type: string) + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col1 input vertices: - 1 Map 5 - 2 Map 6 - Statistics: Num rows: 196 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + 1 Map 4 + Statistics: Num rows: 4 Data size: 356 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + input vertices: + 1 Map 6 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: y1 - filterExpr: ((value < 'zzzzzzzz') and (key < 'zzzzzzzz')) (type: boolean) + filterExpr: (key is not null and (value < 'zzzzzzzz')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 'zzzzzzzz') and (value < 'zzzzzzzz')) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value < 'zzzzzzzz') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -1365,15 +1451,15 @@ STAGE PLANS: TableScan alias: z2 filterExpr: ((key < 'zzzzzzzzzz') and (value < 'zzzzzzzzzz')) (type: boolean) - Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: ((key < 'zzzzzzzzzz') and (value < 'zzzzzzzzzz')) (type: boolean) - Statistics: Num rows: 222 Data size: 39516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 222 Data size: 39516 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) - Statistics: Num rows: 222 Data size: 39516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 222 Data size: 39516 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: no inputs Map 6 @@ -1399,10 +1485,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1520,60 +1606,74 @@ STAGE PLANS: TableScan alias: z1 filterExpr: (key < 'zzzzzzzz') (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key < 'zzzzzzzz') (type: boolean) - Statistics: Num rows: 666 Data size: 57942 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 666 Data size: 57942 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 19 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 700 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 _col1 (type: string) - 1 value (type: string) - 2 value (type: string) + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col1 input vertices: - 1 Map 5 - 2 Map 6 - Statistics: Num rows: 196 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + 1 Map 4 + Statistics: Num rows: 4 Data size: 356 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + input vertices: + 1 Map 6 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: y1 - filterExpr: ((value < 'zzzzzzzz') and (key < 'zzzzzzzz')) (type: boolean) + filterExpr: (key is not null and (value < 'zzzzzzzz')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 'zzzzzzzz') and (value < 'zzzzzzzz')) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value < 'zzzzzzzz') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -1581,15 +1681,15 @@ STAGE PLANS: TableScan alias: z2 filterExpr: ((key < 'zzzzzzzzzz') and (value < 'zzzzzzzzzz')) (type: boolean) - Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: ((key < 'zzzzzzzzzz') and (value < 'zzzzzzzzzz')) (type: boolean) - Statistics: Num rows: 222 Data size: 39516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 222 Data size: 39516 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) - Statistics: Num rows: 222 Data size: 39516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 222 Data size: 39516 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: no inputs Map 6 @@ -1615,10 +1715,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index 3bd60fdfe1..2113e80094 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -602,7 +602,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) Map 4 <- Map 3 (BROADCAST_EDGE) Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -622,15 +623,27 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: true + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 2 => 25 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 + Position of Big Table: 0 + Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: string), _col3 (type: string) + auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: @@ -783,17 +796,14 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Estimated key counts: Map 1 => 25, Map 2 => 25 + Estimated key counts: Map 1 => 39 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col5 input vertices: 0 Map 1 - 1 Map 2 - Position of Big Table: 2 + Position of Big Table: 1 Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) diff --git a/ql/src/test/results/clientpositive/llap/join_is_not_distinct_from.q.out b/ql/src/test/results/clientpositive/llap/join_is_not_distinct_from.q.out index 98d806dcf4..99a850d04f 100644 --- a/ql/src/test/results/clientpositive/llap/join_is_not_distinct_from.q.out +++ b/ql/src/test/results/clientpositive/llap/join_is_not_distinct_from.q.out @@ -126,7 +126,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -144,9 +145,15 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -163,43 +170,42 @@ STAGE PLANS: value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int) 1 value (type: int) - 2 key (type: int) + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -238,7 +244,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -252,32 +259,25 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: int) + Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: int) sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: value (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -286,21 +286,37 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int) 1 value (type: int) - 2 key (type: int) + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -366,7 +382,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -383,9 +400,14 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), value (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -401,43 +423,43 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) + nullSafes: [true, false] + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 key (type: int), value (type: int) nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -476,7 +498,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -489,29 +512,22 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int), key (type: int) + key expressions: key (type: int), value (type: int) sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) + Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int), value (type: int) + key expressions: value (type: int), key (type: int) sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Map-reduce partition columns: value (type: int), key (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -521,21 +537,37 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) + nullSafes: [true, true] + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 key (type: int), value (type: int) nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/join_nullsafe.q.out b/ql/src/test/results/clientpositive/llap/join_nullsafe.q.out index f659718360..5ec8bf1b57 100644 --- a/ql/src/test/results/clientpositive/llap/join_nullsafe.q.out +++ b/ql/src/test/results/clientpositive/llap/join_nullsafe.q.out @@ -126,7 +126,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -144,9 +145,15 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -163,43 +170,42 @@ STAGE PLANS: value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int) 1 value (type: int) - 2 key (type: int) + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -238,7 +244,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -252,32 +259,25 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: int) + Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: int) sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: value (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -286,21 +286,37 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int) 1 value (type: int) - 2 key (type: int) + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -366,7 +382,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -383,9 +400,14 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), value (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -401,43 +423,43 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) + nullSafes: [true, false] + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 key (type: int), value (type: int) nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -476,7 +498,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -489,29 +512,22 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int), key (type: int) + key expressions: key (type: int), value (type: int) sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) + Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int), value (type: int) + key expressions: value (type: int), key (type: int) sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Map-reduce partition columns: value (type: int), key (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -521,21 +537,37 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) + nullSafes: [true, true] + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col6 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 key (type: int), value (type: int) nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/keep_uniform.q.out b/ql/src/test/results/clientpositive/llap/keep_uniform.q.out index 27a48f4807..44b1ba48e7 100644 --- a/ql/src/test/results/clientpositive/llap/keep_uniform.q.out +++ b/ql/src/test/results/clientpositive/llap/keep_uniform.q.out @@ -437,17 +437,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE) - Reducer 14 <- Map 17 (SIMPLE_EDGE), Reducer 12 (ONE_TO_ONE_EDGE) - Reducer 15 <- Reducer 14 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) - Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 13 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Map 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) + Reducer 14 <- Reducer 13 (SIMPLE_EDGE) + Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 13 (ONE_TO_ONE_EDGE) + Reducer 16 <- Reducer 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) + Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 16 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 19 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -504,6 +505,57 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Map 10 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ca_address_sk:int, 1:ca_address_id:string, 2:ca_street_number:string, 3:ca_street_name:string, 4:ca_street_type:string, 5:ca_suite_number:string, 6:ca_city:string, 7:ca_county:string, 8:ca_state:string, 9:ca_zip:string, 10:ca_country:string, 11:ca_gmt_offset:decimal(5,2)/DECIMAL_64, 12:ca_location_type:string, 13:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 8:string, val TX), SelectColumnIsNotNull(col 0:int)) + predicate: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [0, 8] + dataColumns: ca_address_sk:int, ca_address_id:string, ca_street_number:string, ca_street_name:string, ca_street_type:string, ca_suite_number:string, ca_city:string, ca_county:string, ca_state:string, ca_zip:string, ca_country:string, ca_gmt_offset:decimal(5,2)/DECIMAL_64, ca_location_type:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 11 Map Operator Tree: TableScan alias: web_site @@ -554,7 +606,7 @@ STAGE PLANS: dataColumns: web_site_sk:int, web_site_id:string, web_rec_start_date:string, web_rec_end_date:string, web_name:string, web_open_date_sk:int, web_close_date_sk:int, web_class:string, web_manager:string, web_mkt_id:int, web_mkt_class:string, web_mkt_desc:string, web_market_manager:string, web_company_id:int, web_company_name:string, web_street_number:string, web_street_name:string, web_street_type:string, web_suite_number:string, web_city:string, web_county:string, web_state:string, web_zip:string, web_country:string, web_gmt_offset:decimal(5,2)/DECIMAL_64, web_tax_percentage:decimal(5,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 11 + Map 12 Map Operator Tree: TableScan alias: ws1 @@ -607,7 +659,7 @@ STAGE PLANS: dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 16 + Map 17 Map Operator Tree: TableScan alias: ws2 @@ -661,7 +713,7 @@ STAGE PLANS: dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 17 + Map 18 Map Operator Tree: TableScan alias: web_returns @@ -713,7 +765,7 @@ STAGE PLANS: dataColumns: wr_returned_date_sk:int, wr_returned_time_sk:int, wr_item_sk:int, wr_refunded_customer_sk:int, wr_refunded_cdemo_sk:int, wr_refunded_hdemo_sk:int, wr_refunded_addr_sk:int, wr_returning_customer_sk:int, wr_returning_cdemo_sk:int, wr_returning_hdemo_sk:int, wr_returning_addr_sk:int, wr_web_page_sk:int, wr_reason_sk:int, wr_order_number:int, wr_return_quantity:int, wr_return_amt:decimal(7,2)/DECIMAL_64, wr_return_tax:decimal(7,2)/DECIMAL_64, wr_return_amt_inc_tax:decimal(7,2)/DECIMAL_64, wr_fee:decimal(7,2)/DECIMAL_64, wr_return_ship_cost:decimal(7,2)/DECIMAL_64, wr_refunded_cash:decimal(7,2)/DECIMAL_64, wr_reversed_charge:decimal(7,2)/DECIMAL_64, wr_account_credit:decimal(7,2)/DECIMAL_64, wr_net_loss:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 18 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -764,58 +816,7 @@ STAGE PLANS: dataColumns: d_date_sk:int, d_date_id:string, d_date:string, d_month_seq:int, d_week_seq:int, d_quarter_seq:int, d_year:int, d_dow:int, d_moy:int, d_dom:int, d_qoy:int, d_fy_year:int, d_fy_quarter_seq:int, d_fy_week_seq:int, d_day_name:string, d_quarter_name:string, d_holiday:string, d_weekend:string, d_following_holiday:string, d_first_dom:int, d_last_dom:int, d_same_day_ly:int, d_same_day_lq:int, d_current_day:string, d_current_week:string, d_current_month:string, d_current_quarter:string, d_current_year:string partitionColumnCount: 0 scratchColumnTypeNames: [timestamp] - Map 9 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:ca_address_sk:int, 1:ca_address_id:string, 2:ca_street_number:string, 3:ca_street_name:string, 4:ca_street_type:string, 5:ca_suite_number:string, 6:ca_city:string, 7:ca_county:string, 8:ca_state:string, 9:ca_zip:string, 10:ca_country:string, 11:ca_gmt_offset:decimal(5,2)/DECIMAL_64, 12:ca_location_type:string, 13:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 8:string, val TX), SelectColumnIsNotNull(col 0:int)) - predicate: ((ca_state = 'TX') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 13 - includeColumns: [0, 8] - dataColumns: ca_address_sk:int, ca_address_id:string, ca_street_number:string, ca_street_name:string, ca_street_type:string, ca_suite_number:string, ca_city:string, ca_county:string, ca_state:string, ca_zip:string, ca_country:string, ca_gmt_offset:decimal(5,2)/DECIMAL_64, ca_location_type:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 12 + Reducer 13 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -853,7 +854,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 13 + Reducer 14 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -892,7 +893,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumns: 0:int Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 14 + Reducer 15 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -916,7 +917,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 15 + Reducer 16 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1001,23 +1002,41 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col3 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 319 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1027,23 +1046,23 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4), sum(_col5) keys: _col3 (type: int) mode: hash outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1072,7 +1091,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col1), sum(_col2) Group By Vectorization: @@ -1094,7 +1113,7 @@ STAGE PLANS: valueColumns: 0:bigint, 1:decimal(17,2), 2:decimal(17,2) Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1142,7 +1161,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 8 + Reducer 9 Execution mode: vectorized, llap Reduce Vectorization: enabled: true diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out index 8f266f8f25..9543864b2e 100644 --- a/ql/src/test/results/clientpositive/llap/lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept_n10 PREHOOK: Input: default@emp PREHOOK: Input: default@project_n10 PREHOOK: Output: default@tgt_n10 -{"version":"1.0","engine":"tez","database":"default","hash":"bd297ef302d63c60b0bfb692af732b04","queryText":"INSERT INTO TABLE tgt_n10\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept_n10 d ON d.dept_id = em.dept_id\n ) emd JOIN project_n10 p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id AND e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept_n10.dept_name, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 'hll')","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project_n10.project_id, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project_n10.project_name, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt_n10.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt_n10.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt_n10.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt_n10.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project_n10.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project_n10.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_id"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"bd297ef302d63c60b0bfb692af732b04","queryText":"INSERT INTO TABLE tgt_n10\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept_n10 d ON d.dept_id = em.dept_id\n ) emd JOIN project_n10 p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept_n10.dept_name, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 'hll')","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project_n10.project_id, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project_n10.project_name, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt_n10.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt_n10.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt_n10.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt_n10.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt_n10.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project_n10.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project_n10.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept_n10.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out index 46e211d26a..90e1bf7fa1 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out @@ -446,7 +446,8 @@ POSTHOOK: Lineage: part_null_n1.p_partkey SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_retailprice SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_size SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_type SCRIPT [] -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name from part_null_n1 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -467,7 +468,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Map 5 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -495,7 +497,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: part @@ -536,20 +538,31 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 + Statistics: Num rows: 1 Data size: 377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 377 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int), null (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: double), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -561,7 +574,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 3' is a cross product +Warning: Map Join MAPJOIN[26][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -581,8 +595,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 1 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Map 4 <- Reducer 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -605,12 +619,25 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part_null_n1 + filterExpr: p_name is null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: part @@ -626,23 +653,20 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 input vertices: 0 Reducer 2 - 1 Map 1 - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int), null (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: double), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -662,9 +686,18 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 377 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_2.q.out index f87b55b5c2..3a8b593390 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_2.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_2.q.out @@ -603,7 +603,8 @@ POSTHOOK: query: drop materialized view mv1_n0 POSTHOOK: type: DROP_MATERIALIZED_VIEW POSTHOOK: Input: default@mv1_n0 POSTHOOK: Output: default@mv1_n0 -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: create materialized view mv1_n0 as select depts_n0.name, dependents_n0.name as name2, emps_n0.deptno, depts_n0.deptno as deptno2, dependents_n0.empid from depts_n0, dependents_n0, emps_n0 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_6.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_6.q.out index b66cd57acb..ee5dfd1e1c 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_6.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_6.q.out @@ -581,7 +581,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -604,7 +605,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: locations @@ -624,7 +625,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: dependents @@ -651,11 +652,25 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: varchar(256)) 1 _col0 (type: varchar(256)) - 2 _col1 (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(256)) + Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: varchar(256)) + 1 _col1 (type: varchar(256)) outputColumnNames: _col2 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out index 03acafaca8..25c582536e 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out @@ -217,11 +217,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 10 <- Union 4 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Map 11 <- Union 5 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 6 <- Union 5 (SIMPLE_EDGE) + Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -246,6 +247,26 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 10 + Map Operator Tree: + TableScan + alias: emps_n8 + filterExpr: ((deptno > 10) and (deptno <= 11)) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((deptno <= 11) and (deptno > 10)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 11 Map Operator Tree: TableScan alias: default.mv1_n4 @@ -266,7 +287,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: locations_n4 @@ -286,7 +307,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: depts_n6 @@ -307,37 +328,32 @@ STAGE PLANS: value expressions: _col1 (type: varchar(256)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: emps_n8 - filterExpr: ((deptno > 10) and (deptno <= 11)) (type: boolean) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((deptno <= 11) and (deptno > 10)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: varchar(256)) 1 _col0 (type: varchar(256)) - 2 _col1 (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: varchar(256)) + 1 _col1 (type: varchar(256)) outputColumnNames: _col0, _col3 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -350,7 +366,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -368,7 +384,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -383,7 +399,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -400,8 +416,8 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: varchar(256)) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) - Union 4 - Vertex: Union 4 + Union 5 + Vertex: Union 5 Stage: Stage-0 Fetch Operator @@ -514,11 +530,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 6 <- Union 5 (SIMPLE_EDGE) + Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -543,6 +560,27 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 10 + Map Operator Tree: + TableScan + alias: depts_n6 + filterExpr: ((deptno > 10) and (deptno < 20) and ((deptno <= 11) or (deptno >= 19)) and name is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((deptno <= 11) or (deptno >= 19)) and (deptno < 20) and (deptno > 10) and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(256)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 11 Map Operator Tree: TableScan alias: default.mv1_n4 @@ -565,7 +603,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: locations_n4 @@ -585,7 +623,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: emps_n8 @@ -606,28 +644,7 @@ STAGE PLANS: value expressions: _col1 (type: float) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: depts_n6 - filterExpr: ((deptno > 10) and (deptno < 20) and ((deptno <= 11) or (deptno >= 19)) and name is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((deptno <= 11) or (deptno >= 19)) and (deptno < 20) and (deptno > 10) and name is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: deptno (type: int), name (type: varchar(256)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: varchar(256)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 11 + Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -658,11 +675,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: varchar(256)) 1 _col0 (type: varchar(256)) - 2 _col3 (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: varchar(256)) + 1 _col3 (type: varchar(256)) outputColumnNames: _col0, _col4 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -677,7 +709,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -698,7 +730,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -718,7 +750,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -735,8 +767,8 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: varchar(256)) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float) - Union 4 - Vertex: Union 4 + Union 5 + Vertex: Union 5 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_2.q.out index 9765fdb268..d0fe8ddb9e 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_2.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_part_2.q.out @@ -662,7 +662,8 @@ POSTHOOK: query: drop materialized view mv1_part_n0 POSTHOOK: type: DROP_MATERIALIZED_VIEW POSTHOOK: Input: default@mv1_part_n0 POSTHOOK: Output: default@mv1_part_n0 -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: create materialized view mv1_part_n0 partitioned on (deptno2) as select depts_n00.name, dependents_n00.name as name2, emps_n00.deptno, depts_n00.deptno as deptno2, dependents_n00.empid from depts_n00, dependents_n00, emps_n00 diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index 7c1bc565af..072679d32e 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -4037,7 +4037,8 @@ POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08 #### A masked pattern was here #### NULL NULL NULL 98 val_98 2008-04-08 NULL NULL NULL 98 val_98 2008-04-08 -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from (select * from tab_n9 where tab_n9.key = 0)a full outer join diff --git a/ql/src/test/results/clientpositive/llap/mrr.q.out b/ql/src/test/results/clientpositive/llap/mrr.q.out index a8aceea293..bee1539d29 100644 --- a/ql/src/test/results/clientpositive/llap/mrr.q.out +++ b/ql/src/test/results/clientpositive/llap/mrr.q.out @@ -1312,8 +1312,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1337,23 +1339,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -1396,13 +1381,6 @@ STAGE PLANS: Filter Operator predicate: (_col1 > 1L) (type: boolean) Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1414,23 +1392,38 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5 + Statistics: Num rows: 83 Data size: 16434 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: bigint), _col4 (type: string), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col3, _col4, _col5 - Statistics: Num rows: 83 Data size: 16434 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: bigint), _col4 (type: string), _col5 (type: bigint) - outputColumnNames: _col0, _col1, _col3, _col4, _col5 + Statistics: Num rows: 83 Data size: 23655 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 83 Data size: 23655 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 83 Data size: 23655 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) - Reducer 3 + value expressions: _col1 (type: bigint), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -1444,6 +1437,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/semijoin.q.out b/ql/src/test/results/clientpositive/llap/semijoin.q.out index e8f72538a1..a1e4788233 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin.q.out @@ -1321,8 +1321,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1342,7 +1343,7 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -1359,7 +1360,7 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -1390,11 +1391,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col5, _col6 Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1406,7 +1422,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 17 Data size: 3162 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: string) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1598,8 +1614,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1618,7 +1635,7 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -1643,7 +1660,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -1674,18 +1691,32 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Left Semi Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1753,8 +1784,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1769,7 +1801,7 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -1781,25 +1813,29 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1808,27 +1844,45 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 24 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1899,53 +1953,66 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1954,27 +2021,42 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 46 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2048,8 +2130,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2064,7 +2147,7 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -2076,25 +2159,29 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2103,27 +2190,45 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 57 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 24 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2197,44 +2302,53 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -2252,27 +2366,41 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2346,44 +2474,53 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -2401,27 +2538,41 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Right Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 28 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2497,44 +2648,53 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -2552,27 +2712,41 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Full Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index 4221b63f4c..11cbb257cd 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -1723,10 +1723,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Map 6 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1762,7 +1763,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: v @@ -1782,7 +1783,7 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 87000 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: k @@ -1808,12 +1809,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1823,7 +1838,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1838,7 +1853,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2771,10 +2786,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Map 5 <- Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2793,14 +2809,14 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: v - filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_cstring_bloom_filter)))) (type: boolean) + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_11_i_cstring_min) AND DynamicValue(RS_11_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_11_i_cstring_bloom_filter)))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: PARTIAL Column stats: PARTIAL Filter Operator - predicate: ((key1 BETWEEN DynamicValue(RS_8_i_cstring_min) AND DynamicValue(RS_8_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_8_i_cstring_bloom_filter))) and key1 is not null) (type: boolean) + predicate: ((key1 BETWEEN DynamicValue(RS_11_i_cstring_min) AND DynamicValue(RS_11_i_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_11_i_cstring_bloom_filter))) and key1 is not null) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator key expressions: key1 (type: string) @@ -2809,7 +2825,7 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 87000 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: i @@ -2844,12 +2860,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 str (type: string) 1 key1 (type: string) - 2 cstring (type: string) - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col6 + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: string) + 1 cstring (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2859,7 +2889,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2874,7 +2904,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out index 6a43d6b54b..1fca347c63 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out @@ -258,11 +258,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -286,7 +287,28 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col2 (type: double), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 + Map 10 + Map Operator Tree: + TableScan + alias: l + filterExpr: l_orderkey is not null (type: boolean) + Statistics: Num rows: 5999989709 Data size: 91199843728 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_orderkey is not null (type: boolean) + Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: bigint), l_quantity (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 7 Map Operator Tree: TableScan alias: customer @@ -307,7 +329,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: lineitem @@ -332,27 +354,6 @@ STAGE PLANS: value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: l - filterExpr: l_orderkey is not null (type: boolean) - Statistics: Num rows: 5999989709 Data size: 91199843728 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: l_orderkey is not null (type: boolean) - Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: bigint), l_quantity (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -376,32 +377,47 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - 2 _col0 (type: bigint) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1633500065 Data size: 322779612808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1633500065 Data size: 322779612808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col3 (type: string), _col4 (type: bigint), _col5 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col8 - Statistics: Num rows: 12539978782 Data size: 190607677805 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE Top N Key Operator sort order: -++++ keys: _col2 (type: double), _col3 (type: string), _col0 (type: bigint), _col4 (type: bigint), _col5 (type: string) - Statistics: Num rows: 12539978782 Data size: 190607677805 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE top n: 100 Group By Operator aggregations: sum(_col8) keys: _col2 (type: double), _col3 (type: string), _col0 (type: bigint), _col4 (type: bigint), _col5 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12539978782 Data size: 190607677805 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: string) sort order: -++++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: string) - Statistics: Num rows: 12539978782 Data size: 190607677805 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: double) - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -409,11 +425,11 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: bigint), KEY._col3 (type: bigint), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3134994695 Data size: 47651919443 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col4 (type: string), _col3 (type: bigint), _col2 (type: bigint), _col1 (type: string), _col0 (type: double), _col5 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3134994695 Data size: 47651919443 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE @@ -439,7 +455,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -454,7 +470,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/skewjoin.q.out b/ql/src/test/results/clientpositive/llap/skewjoin.q.out index 77cd11730f..f694c1c542 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -263,7 +263,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -283,7 +285,7 @@ STAGE PLANS: value expressions: val (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: b @@ -300,7 +302,7 @@ STAGE PLANS: value expressions: val (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 6 Map Operator Tree: TableScan alias: c @@ -317,7 +319,7 @@ STAGE PLANS: value expressions: val (type: string) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: d @@ -340,22 +342,52 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col11 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,7 +451,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -439,7 +473,7 @@ STAGE PLANS: value expressions: val (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: b @@ -456,7 +490,7 @@ STAGE PLANS: value expressions: val (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 6 Map Operator Tree: TableScan alias: c @@ -473,7 +507,7 @@ STAGE PLANS: value expressions: val (type: string) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: d @@ -496,22 +530,52 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col11 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col10 (type: string) + 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -955,15 +1019,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src - filterExpr: ((key < 80) and (key < 100)) (type: boolean) + filterExpr: (((key < 100) and (key < 80)) or (key < 80)) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 100) and (key < 80)) (type: boolean) @@ -977,27 +1042,18 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src - filterExpr: ((key < 100) and (key < 80)) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 100) and (key < 80)) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 80) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 @@ -1005,19 +1061,20 @@ STAGE PLANS: TableScan alias: src filterExpr: ((key < 100) and (key < 80)) (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 100) and (key < 80)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1026,11 +1083,26 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1042,7 +1114,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out index 07be7f683f..7ce4ba6212 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out @@ -1342,7 +1342,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1370,6 +1371,21 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + Map 5 Map Operator Tree: TableScan alias: a @@ -1382,26 +1398,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out index 2520dde4b4..219a9ebe6b 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out @@ -201,8 +201,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 13 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 7 <- Map 14 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -225,10 +230,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 10 Map Operator Tree: TableScan - alias: b + alias: c filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -245,10 +250,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 11 Map Operator Tree: TableScan - alias: c + alias: d filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -265,10 +270,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 12 Map Operator Tree: TableScan - alias: d + alias: e filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -285,10 +290,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 13 Map Operator Tree: TableScan - alias: e + alias: f filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -305,10 +310,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 + Map 14 Map Operator Tree: TableScan - alias: f + alias: g filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -328,7 +333,7 @@ STAGE PLANS: Map 9 Map Operator Tree: TableScan - alias: g + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -351,20 +356,90 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - Inner Join 0 to 4 - Inner Join 0 to 5 - Inner Join 0 to 6 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) - 4 _col0 (type: int) - 5 _col0 (type: int) - 6 _col0 (type: int) - Statistics: Num rows: 214 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 43 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 43 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 71 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 71 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 118 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 118 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 196 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -374,7 +449,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -472,93 +547,185 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE), Map 5 (CUSTOM_EDGE), Map 6 (CUSTOM_EDGE), Map 7 (CUSTOM_EDGE), Map 8 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 43 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 71 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 7 + Statistics: Num rows: 118 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 8 + Statistics: Num rows: 196 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 Map Operator Tree: TableScan - alias: e + alias: d Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 Map Operator Tree: TableScan - alias: f + alias: e Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 7 Map Operator Tree: TableScan - alias: g + alias: f Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 8 Map Operator Tree: TableScan - alias: a + alias: g Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - Left Outer Join 0 to 3 - Left Outer Join 0 to 4 - Left Outer Join 0 to 5 - Left Outer Join 0 to 6 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) - 4 _col0 (type: int) - 5 _col0 (type: int) - 6 _col0 (type: int) - Statistics: Num rows: 214 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -661,103 +828,211 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE), Map 5 (CUSTOM_EDGE), Map 6 (CUSTOM_EDGE), Map 7 (CUSTOM_EDGE), Map 8 (CUSTOM_EDGE), Map 9 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 43 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 71 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 7 + Statistics: Num rows: 118 Data size: 472 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 8 + Statistics: Num rows: 196 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 9 + Statistics: Num rows: 326 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 Map Operator Tree: TableScan - alias: e + alias: d Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 Map Operator Tree: TableScan - alias: f + alias: e Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 7 Map Operator Tree: TableScan - alias: g + alias: f Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 8 Map Operator Tree: TableScan - alias: h + alias: g Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 9 Map Operator Tree: TableScan - alias: a + alias: h Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - Left Outer Join 0 to 3 - Left Outer Join 0 to 4 - Left Outer Join 0 to 5 - Left Outer Join 0 to 6 - Left Outer Join 0 to 7 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) - 4 _col0 (type: int) - 5 _col0 (type: int) - 6 _col0 (type: int) - 7 _col0 (type: int) - Statistics: Num rows: 357 Data size: 2856 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -890,185 +1165,461 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) + Map 1 <- Map 10 (CUSTOM_EDGE), Map 11 (CUSTOM_EDGE), Map 12 (CUSTOM_EDGE), Map 13 (CUSTOM_EDGE), Map 14 (CUSTOM_EDGE), Map 15 (CUSTOM_EDGE), Map 16 (CUSTOM_EDGE), Map 17 (CUSTOM_EDGE), Map 18 (CUSTOM_EDGE), Map 19 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE), Map 20 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE), Map 5 (CUSTOM_EDGE), Map 6 (CUSTOM_EDGE), Map 7 (CUSTOM_EDGE), Map 8 (CUSTOM_EDGE), Map 9 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 26 Data size: 2418 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 4 + Statistics: Num rows: 43 Data size: 3999 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 71 Data size: 6603 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 6 + Statistics: Num rows: 118 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 7 + Statistics: Num rows: 196 Data size: 18228 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 8 + Statistics: Num rows: 326 Data size: 30318 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 9 + Statistics: Num rows: 543 Data size: 50499 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 10 + Statistics: Num rows: 905 Data size: 84165 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 11 + Statistics: Num rows: 1508 Data size: 140244 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 12 + Statistics: Num rows: 2513 Data size: 233709 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 13 + Statistics: Num rows: 4188 Data size: 389484 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 14 + Statistics: Num rows: 6980 Data size: 649140 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 15 + Statistics: Num rows: 11633 Data size: 1081869 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 16 + Statistics: Num rows: 19388 Data size: 1803084 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 17 + Statistics: Num rows: 32313 Data size: 3005109 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 18 + Statistics: Num rows: 53855 Data size: 5008515 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 19 + Statistics: Num rows: 89758 Data size: 8347494 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 20 + Statistics: Num rows: 149596 Data size: 13912428 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 149596 Data size: 13912428 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: j Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 11 Map Operator Tree: TableScan - alias: c + alias: k Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 12 Map Operator Tree: TableScan - alias: d + alias: l Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 13 Map Operator Tree: TableScan - alias: e + alias: m Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 14 Map Operator Tree: TableScan - alias: f + alias: n Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 15 Map Operator Tree: TableScan - alias: g + alias: o Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 16 Map Operator Tree: TableScan - alias: h + alias: p Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 17 Map Operator Tree: TableScan - alias: i + alias: q Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 18 Map Operator Tree: TableScan - alias: j + alias: r Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 19 Map Operator Tree: TableScan - alias: k + alias: s Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 2 Map Operator Tree: TableScan - alias: l + alias: b Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 20 Map Operator Tree: TableScan - alias: m + alias: t Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan - alias: n + alias: c Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan - alias: o + alias: d Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 Map Operator Tree: TableScan - alias: p + alias: e Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - Left Outer Join 0 to 3 - Left Outer Join 0 to 4 - Left Outer Join 0 to 5 - Left Outer Join 0 to 6 - Left Outer Join 0 to 7 - Left Outer Join 0 to 8 - Left Outer Join 0 to 9 - Left Outer Join 0 to 10 - Left Outer Join 0 to 11 - Left Outer Join 0 to 12 - Left Outer Join 0 to 13 - Left Outer Join 0 to 14 - Left Outer Join 0 to 15 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 10 _col0 (type: int) - 11 _col0 (type: int) - 12 _col0 (type: int) - 13 _col0 (type: int) - 14 _col0 (type: int) - 15 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) - 4 _col0 (type: int) - 5 _col0 (type: int) - 6 _col0 (type: int) - 7 _col0 (type: int) - 8 _col0 (type: int) - 9 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 21268 Data size: 1977924 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 21268 Data size: 1977924 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: llap - Map 18 - Map Operator Tree: - TableScan - alias: q + alias: f Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) @@ -1081,10 +1632,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 19 + Map 7 Map Operator Tree: TableScan - alias: r + alias: g Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) @@ -1097,10 +1648,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 20 + Map 8 Map Operator Tree: TableScan - alias: s + alias: h Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) @@ -1113,10 +1664,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 21 + Map 9 Map Operator Tree: TableScan - alias: t + alias: i Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) @@ -1129,30 +1680,6 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - Left Outer Join 0 to 3 - Left Outer Join 0 to 4 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) - 4 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 164104 Data size: 15261672 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 164104 Data size: 15261672 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out index 67c10b46db..cf0663ed6c 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out @@ -67,12 +67,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: a + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -84,7 +87,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b + alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -94,6 +97,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -106,24 +125,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -164,12 +191,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: a + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -181,7 +211,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b + alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -191,6 +221,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -203,24 +249,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -261,6 +315,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -272,14 +329,6 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -291,21 +340,53 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -351,12 +432,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -364,12 +448,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -378,24 +478,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -444,43 +552,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap Map 4 Map Operator Tree: TableScan @@ -503,17 +609,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 - Full Outer Join 1 to 2 + Full Outer Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -569,9 +673,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -596,6 +703,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -608,24 +731,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -668,6 +799,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 2 @@ -679,14 +813,6 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: b @@ -698,21 +824,53 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -757,9 +915,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -776,6 +937,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -784,24 +961,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -850,10 +1035,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -862,15 +1047,6 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 Map Operator Tree: TableScan alias: b @@ -879,14 +1055,21 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap Map 4 Map Operator Tree: TableScan @@ -904,22 +1087,20 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 - Full Outer Join 1 to 2 + Full Outer Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -972,9 +1153,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -999,6 +1183,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -1011,24 +1211,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1073,7 +1281,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1093,7 +1302,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1110,7 +1319,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -1133,16 +1342,31 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1198,7 +1422,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1218,7 +1443,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1235,7 +1460,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -1258,16 +1483,31 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Right Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1320,7 +1560,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1340,7 +1581,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1357,7 +1598,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -1380,16 +1621,31 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out index d33b3fa263..fa997c4fd6 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out @@ -67,12 +67,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: a + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -84,7 +87,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b + alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -94,6 +97,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -106,24 +125,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -164,12 +191,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: a + alias: b filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -181,7 +211,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b + alias: a filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -191,6 +221,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -203,24 +249,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -261,6 +315,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -272,14 +329,6 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -291,21 +340,53 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -351,12 +432,15 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) @@ -364,12 +448,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -378,24 +478,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -444,43 +552,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap Map 4 Map Operator Tree: TableScan @@ -503,17 +609,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 - Full Outer Join 1 to 2 + Full Outer Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -569,9 +673,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -596,6 +703,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -608,24 +731,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -668,6 +799,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 2 @@ -679,14 +813,6 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: b @@ -698,21 +824,53 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -757,9 +915,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -776,6 +937,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -784,24 +961,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -850,10 +1035,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -862,15 +1047,6 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 Map Operator Tree: TableScan alias: b @@ -879,14 +1055,21 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap Map 4 Map Operator Tree: TableScan @@ -904,22 +1087,20 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 - Full Outer Join 1 to 2 + Full Outer Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -972,9 +1153,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 2 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -999,6 +1183,22 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan alias: c @@ -1011,24 +1211,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1073,7 +1281,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1093,7 +1302,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1110,7 +1319,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -1133,16 +1342,31 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1198,7 +1422,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1218,7 +1443,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1235,7 +1460,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -1258,16 +1483,31 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Right Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1320,7 +1560,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1340,7 +1581,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1357,7 +1598,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -1380,16 +1621,31 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out index 2c6c493a15..6125f2a347 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out @@ -2769,6 +2769,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2786,7 +2789,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan - alias: c + alias: a filterExpr: (key > 1000) (type: boolean) Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -2796,9 +2799,25 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + Execution mode: llap + Map 4 Map Operator Tree: TableScan - alias: a + alias: c filterExpr: (key > 1000) (type: boolean) Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -2808,24 +2827,32 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 19db05b152..b8d6230680 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -714,7 +714,7 @@ POSTHOOK: Input: default@part_null 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -732,11 +732,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -760,7 +761,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: part_null @@ -793,7 +794,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: part_null @@ -820,32 +821,47 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col3 (type: string) 1 _col0 (type: string) - 2 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12 - Statistics: Num rows: 2 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: bigint), _col12 (type: bigint), _col10 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 - Statistics: Num rows: 2 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col10 = 0L) or (_col13 is null and (_col11 >= _col10))) (type: boolean) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -863,7 +879,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -875,7 +891,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -892,7 +908,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 8 + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -911,7 +927,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product PREHOOK: query: select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index b175aa97be..007161ec5a 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -6291,11 +6291,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Map 3 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -6315,7 +6316,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: t7 @@ -6351,7 +6352,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: fixob @@ -6378,13 +6379,28 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col1 (type: int) 1 _col0 (type: int) - 2 _col2 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col6 - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col3 = 0L)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col6 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE @@ -6399,7 +6415,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -6414,7 +6430,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -6432,7 +6448,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -6449,7 +6465,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) - Reducer 8 + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -6531,11 +6547,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -6583,7 +6600,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: t_n0 @@ -6610,13 +6627,28 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col1 (type: int) 1 _col0 (type: int) - 2 _col2 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col3 (type: bigint), _col4 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col6 - Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col3 = 0L)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col6 is not null) THEN (false) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean) Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE @@ -6631,7 +6663,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -6646,7 +6678,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -6664,7 +6696,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -6681,7 +6713,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 6499cf1176..bf9030a752 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -206,7 +206,8 @@ POSTHOOK: Input: default@part_null_n0 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -217,7 +218,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@tempty_n0 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -238,7 +240,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Map 4 (XPROD_EDGE), Map 5 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 3 <- Map 5 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Map 6 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -259,7 +262,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: tempty_n0 @@ -274,7 +277,7 @@ STAGE PLANS: value expressions: _col0 (type: char(2)) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: part @@ -311,21 +314,34 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 95 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: char(2)) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 residual filter predicates: {(UDFToDouble(_col7) > UDFToDouble(_col1))} - Statistics: Num rows: 8 Data size: 5712 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 8 Data size: 5720 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5712 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 8 Data size: 5720 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 5712 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 8 Data size: 5720 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -337,7 +353,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n0 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -358,7 +375,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Map 5 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -386,7 +404,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: part @@ -427,20 +445,31 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 + Statistics: Num rows: 1 Data size: 377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 377 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int), null (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: double), _col9 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 876 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -452,7 +481,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_name = (select p_name from part_null_n0 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -981,7 +1011,8 @@ POSTHOOK: Input: default@part 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl 49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -1006,8 +1037,9 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 7 (XPROD_EDGE), Reducer 4 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 6 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 6 <- Map 8 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 7 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1022,7 +1054,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: part @@ -1141,11 +1173,24 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col1, _col2, _col3, _col4 residual filter predicates: {(_col4 > _col1)} Statistics: Num rows: 8 Data size: 1816 Basic stats: COMPLETE Column stats: COMPLETE @@ -1160,7 +1205,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -1181,7 +1226,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -2819,7 +2865,8 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### 2346.3 3461.37 -Warning: Shuffle Join MERGEJOIN[76][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[78][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2837,16 +2884,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) - Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 14 <- Reducer 12 (SIMPLE_EDGE), Union 8 (CONTAINS) + Reducer 10 <- Union 9 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) + Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) + Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 15 <- Reducer 13 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 6 <- Map 15 (XPROD_EDGE), Reducer 10 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 7 <- Reducer 2 (SIMPLE_EDGE), Union 8 (CONTAINS) - Reducer 9 <- Union 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 11 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 7 <- Map 16 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Union 9 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -2869,7 +2917,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 11 + Map 12 Map Operator Tree: TableScan alias: part @@ -2889,7 +2937,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 15 + Map 16 Map Operator Tree: TableScan alias: part @@ -2905,6 +2953,33 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 11 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2920,7 +2995,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 12 + Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2946,7 +3021,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 13 + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2967,7 +3042,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 14 + Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3061,11 +3136,24 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 residual filter predicates: {(UDFToLong(_col7) > _col0)} Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE @@ -3080,7 +3168,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3101,37 +3189,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 = 2L) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Union 4 Vertex: Union 4 - Union 8 - Vertex: Union 8 + Union 9 + Vertex: Union 9 Stage: Stage-0 Fetch Operator @@ -3139,7 +3200,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[76][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[78][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 7' is a cross product PREHOOK: query: select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4267,9 +4329,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4293,7 +4356,7 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: part @@ -4313,7 +4376,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: lineitem @@ -4342,18 +4405,33 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col5 residual filter predicates: {(_col1 > _col5)} - Statistics: Num rows: 8 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: double) outputColumnNames: _col2 - Statistics: Num rows: 8 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2) mode: hash @@ -4363,7 +4441,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -4378,7 +4456,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -5736,7 +5814,8 @@ having count(*) > (select count(*) from src s1 where s1.key > '9' ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5755,9 +5834,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (XPROD_EDGE), Reducer 2 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 4 <- Map 8 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5783,7 +5863,7 @@ STAGE PLANS: value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: part @@ -5804,7 +5884,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: part @@ -5842,11 +5922,24 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 residual filter predicates: {(_col7 > _col0)} Statistics: Num rows: 112 Data size: 69776 Basic stats: COMPLETE Column stats: COMPLETE @@ -5861,7 +5954,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -5880,7 +5973,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -5920,9 +6013,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5946,7 +6040,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: p @@ -5967,7 +6061,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: p @@ -5996,26 +6090,41 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col4 (type: string) 1 _col0 (type: string) - 2 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 30 Data size: 18570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 30 Data size: 18570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 residual filter predicates: {(_col5 > _col10)} - Statistics: Num rows: 4 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 5571 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 5571 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -6041,7 +6150,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 4 Data size: 416 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -7225,7 +7334,8 @@ HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Shuffle Join MERGEJOIN[73][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[74][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[76][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain cbo with avg_sales as (select avg(quantity*list_price) over( partition by list_price) average_sales from (select ss_quantity quantity diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index a0b03df3ba..629ac5075b 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -4238,7 +4238,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### true -Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select o.p_size, (select count(distinct p_type) from part p where p.p_partkey = o.p_partkey) tmp FROM part o right join (select * from part where p_size > (select avg(p_size) from part)) t on t.p_partkey = o.p_partkey PREHOOK: type: QUERY @@ -4258,10 +4258,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4289,7 +4290,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: part @@ -4309,7 +4310,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: p @@ -4336,25 +4337,40 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 16 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) outputColumnNames: _col1, _col3, _col4 - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), CASE WHEN (_col4 is null) THEN (0) ELSE (_col3) END (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -4375,7 +4391,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -4391,7 +4407,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -4422,7 +4438,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select o.p_size, (select count(distinct p_type) from part p where p.p_partkey = o.p_partkey) tmp FROM part o right join (select * from part where p_size > (select avg(p_size) from part)) t on t.p_partkey = o.p_partkey PREHOOK: type: QUERY @@ -5201,7 +5217,7 @@ POSTHOOK: Input: default@part 6 28 6 28 7 28 -Warning: Shuffle Join MERGEJOIN[137][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[144][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type @@ -5229,16 +5245,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 7 (SIMPLE_EDGE) - Reducer 11 <- Map 13 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE) - Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) + Reducer 10 <- Map 8 (SIMPLE_EDGE) + Reducer 11 <- Map 8 (SIMPLE_EDGE) + Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (ONE_TO_ONE_EDGE) + Reducer 13 <- Map 15 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 13 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5273,7 +5290,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 13 + Map 14 Map Operator Tree: TableScan alias: p @@ -5294,7 +5311,7 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 14 + Map 15 Map Operator Tree: TableScan alias: pp @@ -5314,7 +5331,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: t2 @@ -5358,6 +5375,25 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 11 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -5383,7 +5419,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 11 + Reducer 12 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -5400,7 +5436,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 30 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) - Reducer 12 + Reducer 13 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -5422,18 +5458,33 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 59 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 59 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col3 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -5443,19 +5494,19 @@ STAGE PLANS: 0 1 outputColumnNames: _col1, _col3, _col5 - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 59 Data size: 948 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col3 (type: bigint), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 59 Data size: 948 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 59 Data size: 948 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -5480,7 +5531,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -5499,7 +5550,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -5511,7 +5562,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 8 + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -5537,25 +5588,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator @@ -5563,7 +5595,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[137][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[144][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out index aa50b468c3..418238236d 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -108,14 +108,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 10 <- Map 1 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 5 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) - Reducer 8 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -195,7 +197,7 @@ STAGE PLANS: value expressions: _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 9 + Map 11 Map Operator Tree: TableScan alias: b @@ -219,6 +221,21 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 5238 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 27 Data size: 5238 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -237,27 +254,42 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - 2 _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 193 Data size: 34802 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 193 Data size: 34802 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint), _col5 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col4, _col5, _col8 - Statistics: Num rows: 27 Data size: 5346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 386 Data size: 73020 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 193 Data size: 36518 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 193 Data size: 34354 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 2314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 193 Data size: 34354 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -267,15 +299,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 193 Data size: 34354 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 2314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 193 Data size: 34354 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -290,7 +322,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 27 Data size: 5238 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -308,7 +340,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 27 Data size: 4914 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -331,51 +363,51 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: string), _col1 (type: string) Statistics: Num rows: 27 Data size: 4914 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - 2 _col3 (type: string), _col1 (type: string) + 1 _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col4, _col5, _col8 - Statistics: Num rows: 27 Data size: 2889 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 319 Data size: 30993 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 13 Data size: 1391 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 159 Data size: 15453 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1131 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 159 Data size: 13833 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9 Data size: 783 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 - Execution mode: vectorized, llap + Statistics: Num rows: 9 Data size: 783 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 9 + Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 5238 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 193 Data size: 34802 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 27 Data size: 5238 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint), _col3 (type: bigint) + Statistics: Num rows: 193 Data size: 34802 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint), _col5 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out index 7e09d5e7fb..c1156759d2 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out @@ -37,8 +37,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -62,7 +63,7 @@ STAGE PLANS: value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -80,17 +81,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToInteger(key) + 0) (type: int) outputColumnNames: _col0 @@ -108,28 +98,43 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col12 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -237,8 +242,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -262,7 +268,7 @@ STAGE PLANS: value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -280,17 +286,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToInteger(key) + 0) (type: int) outputColumnNames: _col0 @@ -308,28 +303,43 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col12 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -437,8 +447,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -462,7 +473,7 @@ STAGE PLANS: value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -480,17 +491,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToInteger(key) + 0) (type: int) outputColumnNames: _col0 @@ -508,28 +508,43 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col12 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out index db16daef74..95fb68bdd7 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out @@ -559,7 +559,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -575,18 +576,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE - Map Operator Tree: - TableScan - alias: s3 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Map Operator Tree: TableScan alias: s1 @@ -602,23 +591,57 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 532 Data size: 2129 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 382 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 382 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap + Map 5 + Map Operator Tree: + TableScan + alias: s3 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: unknown Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 420 Data size: 1680 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -676,7 +699,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -694,7 +718,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Map Operator Tree: TableScan - alias: s3 + alias: s1 filterExpr: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -704,9 +728,24 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 266 Data size: 1064 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 1064 Basic stats: PARTIAL Column stats: NONE + Execution mode: llap + Map 5 Map Operator Tree: TableScan - alias: s1 + alias: s3 filterExpr: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -716,26 +755,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 532 Data size: 2129 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 292 Data size: 1170 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_reduce_side.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_reduce_side.q.out index 5f8cbafb2d..c58a28d024 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_reduce_side.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_reduce_side.q.out @@ -594,9 +594,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -628,7 +629,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: t2 @@ -667,28 +668,43 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col2 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) outputColumnNames: _col0, _col2, _col3, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col2 = 0L)) THEN (true) WHEN (_col2 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -720,7 +736,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -798,9 +814,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -832,7 +849,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: t2 @@ -871,28 +888,43 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col2 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) outputColumnNames: _col0, _col2, _col3, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col2 = 0L)) THEN (true) WHEN (_col2 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -924,7 +956,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_union.q.out b/ql/src/test/results/clientpositive/llap/tez_union.q.out index 875b75f3f5..5539663fdb 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union.q.out @@ -586,25 +586,32 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 input vertices: 1 Map 9 - 2 Map 10 - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 3754 Data size: 653196 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) + input vertices: + 1 Map 10 + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 10 @@ -683,25 +690,32 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 input vertices: 1 Map 9 - 2 Map 10 - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 3754 Data size: 653196 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) + input vertices: + 1 Map 10 + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 6 @@ -750,25 +764,32 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 input vertices: 1 Map 9 - 2 Map 10 - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) + Statistics: Num rows: 3754 Data size: 653196 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) + input vertices: + 1 Map 10 + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 8 @@ -827,10 +848,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5941 Data size: 1550601 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5939 Data size: 1550079 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out index 7e09d5e7fb..c1156759d2 100644 --- a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out @@ -37,8 +37,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -62,7 +63,7 @@ STAGE PLANS: value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -80,17 +81,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToInteger(key) + 0) (type: int) outputColumnNames: _col0 @@ -108,28 +98,43 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col12 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -237,8 +242,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -262,7 +268,7 @@ STAGE PLANS: value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -280,17 +286,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToInteger(key) + 0) (type: int) outputColumnNames: _col0 @@ -308,28 +303,43 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col12 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -437,8 +447,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -462,7 +473,7 @@ STAGE PLANS: value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -480,17 +491,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToInteger(key) + 0) (type: int) outputColumnNames: _col0 @@ -508,28 +508,43 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col12 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col12 (type: int) + sort order: + + Map-reduce partition columns: _col12 (type: int) + Statistics: Num rows: 500 Data size: 40234 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col12 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 791 Data size: 6700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 791 Data size: 130440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 29e93c017d..6b976d8386 100644 --- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -1655,7 +1655,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1683,41 +1684,89 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Map 5 Map Operator Tree: TableScan alias: a filterExpr: (key < 6) (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0:int, val 6) predicate: (key < 6) (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 2 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 3a56c0d329..cd1e01d1ac 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -670,7 +670,9 @@ b str two line1 four line2 six line3 -Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL INSERT INTO TABLE orc_create_complex_n0 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2 @@ -701,7 +703,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 4 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -720,15 +723,47 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:string, 1:map, 2:array, 3:struct - Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:string, col 1:map, col 2:array, col 3:struct + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Supports Value Types [MAP, LIST, STRUCT] IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 1 Data size: 3445 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:string, col 1:map, col 2:array, col 3:struct + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Supports Value Types [MAP, LIST, STRUCT] IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 3450 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string, 1:map, 2:array, 3:struct + Statistics: Num rows: 1 Data size: 3450 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) Execution mode: vectorized, llap Map Vectorization: enabled: true @@ -736,7 +771,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -838,39 +873,37 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 1 - 2 - 3 Map Join Vectorization: - bigTableValueExpressions: col 0:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string, 4:map, 5:array, 6:struct, 0:string + smallTableValueMapping: 3:string, 4:map, 5:array, 6:struct + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col6 input vertices: 0 Map 1 - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4] - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [3, 4, 5, 6, 0] + Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -910,7 +943,9 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: INSERT INTO TABLE orc_create_complex_n0 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out b/ql/src/test/results/clientpositive/llap/vector_join30.q.out index 7c873c14a9..b2ddb235d0 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -938,44 +938,64 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Map Join Vectorization: - bigTableKeyExpressions: col 0:string - bigTableValueExpressions: col 0:string, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col2, _col3 + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinInnerBigOnlyStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:bigint + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinInnerBigOnlyStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -992,7 +1012,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [bigint] Map 4 Map Operator Tree: TableScan @@ -1128,36 +1148,44 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1175,34 +1203,90 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 2 Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinInnerBigOnlyStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1211,16 +1295,16 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + scratchColumnTypeNames: [bigint] + Map 4 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -1263,31 +1347,6 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1372,8 +1431,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1392,16 +1451,67 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 0:string, 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [3, 4] + bigTableValueColumns: 3:string, 4:string + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 3:string, 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1410,16 +1520,16 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + scratchColumnTypeNames: [string, string, bigint] + Map 3 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -1464,7 +1574,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -1508,31 +1618,6 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1616,8 +1701,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1636,16 +1722,39 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 0:string, 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:string, 4:string + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1662,8 +1771,8 @@ STAGE PLANS: includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + scratchColumnTypeNames: [string, string] + Map 2 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -1708,7 +1817,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 3 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -1724,16 +1833,46 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 3:string, 4:string + smallTableValueMapping: 3:string, 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1742,41 +1881,16 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + scratchColumnTypeNames: [string, string, bigint] + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1860,8 +1974,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1907,7 +2022,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 2 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -1923,18 +2038,38 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 3:string, 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 3:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string, 1:string + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1951,8 +2086,8 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + scratchColumnTypeNames: [string] + Map 3 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -1968,16 +2103,46 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 3:string, 4:string + smallTableValueMapping: 3:string, 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 2 + Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1986,41 +2151,16 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + scratchColumnTypeNames: [string, string, bigint] + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2104,7 +2244,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 4 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2116,6 +2257,31 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -2155,30 +2321,58 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinInnerBigOnlyStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string, 1:string + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2196,62 +2390,17 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 - Map Operator Tree: - TableScan - alias: orcsrc_n0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 - Full Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -2348,81 +2497,49 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcsrc_n0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 - Map Operator Tree: - TableScan - alias: orcsrc_n0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator + className: VectorReduceSinkObjectHashOperator keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + partitionColumns: 0:string + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: 0:string + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2436,7 +2553,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -2444,28 +2561,31 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orcsrc_n0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator + className: VectorReduceSinkObjectHashOperator keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + partitionColumns: 0:string + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2479,7 +2599,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -2489,13 +2609,31 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Full Outer Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -2508,7 +2646,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2592,7 +2730,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2733,22 +2871,30 @@ STAGE PLANS: Merge Join Operator condition map: Full Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -2836,7 +2982,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2856,16 +3003,39 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 0:string, 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:string, 4:string + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2882,7 +3052,7 @@ STAGE PLANS: includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [string, string] Map 4 Map Operator Tree: TableScan @@ -2976,14 +3146,12 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 - Full Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash @@ -3080,8 +3248,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 4 <- Reducer 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3127,7 +3296,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -3172,7 +3341,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc_n0 @@ -3188,16 +3357,46 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 3:string, 4:string + smallTableValueMapping: 3:string, 4:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col2, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 1582 Data size: 281596 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3206,41 +3405,36 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [string, string, bigint] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Full Outer Join 0 to 1 - Right Outer Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 265000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -3324,7 +3518,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 4 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -3354,39 +3549,14 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 - Map Operator Tree: - TableScan - alias: orcsrc_n0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -3396,9 +3566,7 @@ STAGE PLANS: keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3412,36 +3580,58 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc_n0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 3:string, 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 3:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string, 1:string + Statistics: Num rows: 791 Data size: 209615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3455,23 +3645,21 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [string] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 - Full Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1251 Data size: 222678 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1291 Data size: 229798 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash diff --git a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index 6d49db9447..4b8c4b8c98 100644 --- a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -200,41 +200,56 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int) 1 value (type: int) - 2 key (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int, col 3:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -369,42 +384,57 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int) 1 value (type: int) - 2 key (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 12 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int, col 3:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -558,42 +588,57 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int, col 1:int bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false nullSafes: [true, false] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 key (type: int), value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int, col 3:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false + nullSafes: [true, false] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -726,42 +771,57 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int, col 1:int bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false nullSafes: [true, true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 12 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 key (type: int), value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int, col 3:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false + nullSafes: [true, true] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1238,41 +1298,54 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int) 1 value (type: int) - 2 key (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 0, 4] Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1407,42 +1480,57 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int) 1 value (type: int) - 2 key (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 12 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int, col 3:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1596,42 +1684,57 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int, col 1:int bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false nullSafes: [true, false] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 key (type: int), value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int, col 3:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false + nullSafes: [true, false] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1764,42 +1867,57 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int, col 1:int bigTableValueExpressions: col 0:int, col 1:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false nullSafes: [true, true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 2 - 2 Map 3 - Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 12 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 key (type: int), value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int, col 3:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false + nullSafes: [true, true] + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 368173999c..b502dc3c21 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -776,10 +776,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -810,7 +811,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE @@ -825,7 +826,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -853,7 +854,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE @@ -885,46 +886,12 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Map 6 - Map Operator Tree: - TableScan - alias: c - filterExpr: key_int is not null (type: boolean) - Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1:int) - predicate: key_int is not null (type: boolean) - Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key_int (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE @@ -936,7 +903,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -945,11 +912,28 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 94 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 94 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 94 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -963,7 +947,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -994,7 +978,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: enabled: true diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out b/ql/src/test/results/clientpositive/mapjoin47.q.out index 67b2f4cd03..2ccce761b2 100644 --- a/ql/src/test/results/clientpositive/mapjoin47.q.out +++ b/ql/src/test/results/clientpositive/mapjoin47.q.out @@ -1729,7 +1729,11 @@ POSTHOOK: Input: default@src1 11 val_11 NULL NULL 66 val_66 12 val_12 NULL NULL 66 val_66 12 val_12 NULL NULL 66 val_66 -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Map Join MAPJOIN[40][bigTable=?] in task 'Stage-8:MAPRED' is a cross product +Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-9:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[50][bigTable=?] in task 'Stage-11:MAPRED' is a cross product +Warning: Map Join MAPJOIN[51][bigTable=?] in task 'Stage-11:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM cbo_t1 t1 @@ -1755,26 +1759,28 @@ POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-16 is a root stage + Stage-11 depends on stages: Stage-16 + Stage-10 depends on stages: Stage-11, Stage-13 , consists of Stage-14, Stage-15, Stage-3 + Stage-14 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-14 + Stage-15 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-15 + Stage-3 + Stage-17 is a root stage + Stage-13 depends on stages: Stage-17 + Stage-0 depends on stages: Stage-8, Stage-9, Stage-3 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:t5 Fetch Operator limit: -1 - $hdt$_1:t3 - Fetch Operator - limit: -1 $hdt$_2:t4 Fetch Operator limit: -1 - $hdt$_3:$hdt$_3:t1 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:t5 TableScan @@ -1788,11 +1794,9 @@ STAGE PLANS: keys: 0 1 - 2 - 3 - $hdt$_1:t3 + $hdt$_2:t4 TableScan - alias: t3 + alias: t4 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) @@ -1802,22 +1806,179 @@ STAGE PLANS: keys: 0 1 - 2 - 3 - $hdt$_2:t4 + + Stage: Stage-11 + Map Reduce + Map Operator Tree: TableScan - alias: t4 + alias: t3 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 1 - 2 - 3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 400 Data size: 294036 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 8000 Data size: 8815454 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + residual filter predicates: {((_col0 = _col25) or ((_col29 = _col17) and _col6))} {((_col13 = _col25) or ((_col28 = _col16) and (_col14 = _col26)))} {((_col7 = _col25) or ((_col8 = _col26) and (_col27 = _col21)))} + Statistics: Num rows: 337500 Data size: 618975000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col19 (type: string), _col20 (type: string), _col21 (type: int), _col22 (type: float), _col23 (type: boolean), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: int), _col28 (type: float), _col29 (type: boolean), _col30 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: int), _col16 (type: float), _col17 (type: boolean), _col18 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 337500 Data size: 617625000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 18300 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 18300 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + residual filter predicates: {((_col0 = _col25) or ((_col29 = _col17) and _col6))} {((_col13 = _col25) or ((_col28 = _col16) and (_col14 = _col26)))} {((_col7 = _col25) or ((_col8 = _col26) and (_col27 = _col21)))} + Statistics: Num rows: 337500 Data size: 618975000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col19 (type: string), _col20 (type: string), _col21 (type: int), _col22 (type: float), _col23 (type: boolean), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: int), _col28 (type: float), _col29 (type: boolean), _col30 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: int), _col16 (type: float), _col17 (type: boolean), _col18 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 337500 Data size: 617625000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 18300 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 18300 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 8000 Data size: 8815454 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: boolean), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: int), _col16 (type: float), _col17 (type: boolean), _col18 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 100 Data size: 72921 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: float), _col10 (type: boolean), _col11 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + residual filter predicates: {((_col0 = _col25) or ((_col29 = _col17) and _col6))} {((_col13 = _col25) or ((_col28 = _col16) and (_col14 = _col26)))} {((_col7 = _col25) or ((_col8 = _col26) and (_col27 = _col21)))} + Statistics: Num rows: 337500 Data size: 618975000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col19 (type: string), _col20 (type: string), _col21 (type: int), _col22 (type: float), _col23 (type: boolean), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: int), _col28 (type: float), _col29 (type: boolean), _col30 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: int), _col16 (type: float), _col17 (type: boolean), _col18 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 + Statistics: Num rows: 337500 Data size: 617625000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 18300 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 18300 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_3:$hdt$_3:t1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: $hdt$_3:$hdt$_3:t1 TableScan alias: t1 @@ -1831,7 +1992,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -1849,33 +2010,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 100 Data size: 72921 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - keys: - 0 - 1 - 2 - 3 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 - residual filter predicates: {((_col0 = _col25) or ((_col29 = _col17) and _col6))} {((_col13 = _col25) or ((_col28 = _col16) and (_col14 = _col26)))} {((_col7 = _col25) or ((_col8 = _col26) and (_col27 = _col21)))} - Statistics: Num rows: 337500 Data size: 618975000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col19 (type: string), _col20 (type: string), _col21 (type: int), _col22 (type: float), _col23 (type: boolean), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: int), _col28 (type: float), _col29 (type: boolean), _col30 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: float), _col11 (type: boolean), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: int), _col16 (type: float), _col17 (type: boolean), _col18 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 - Statistics: Num rows: 337500 Data size: 617625000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 18300 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 18300 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized Local Work: Map Reduce Local Work @@ -1885,7 +2026,11 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Map Join MAPJOIN[40][bigTable=?] in task 'Stage-8:MAPRED' is a cross product +Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-9:MAPRED' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[50][bigTable=?] in task 'Stage-11:MAPRED' is a cross product +Warning: Map Join MAPJOIN[51][bigTable=?] in task 'Stage-11:MAPRED' is a cross product PREHOOK: query: SELECT * FROM cbo_t1 t1 RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key) diff --git a/ql/src/test/results/clientpositive/mapjoin_hook.q.out b/ql/src/test/results/clientpositive/mapjoin_hook.q.out index 1334ffbed8..2e02814edf 100644 --- a/ql/src/test/results/clientpositive/mapjoin_hook.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_hook.q.out @@ -23,10 +23,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1_n171 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL -RUN: Stage-5:MAPRED +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-6:MAPRED RUN: Stage-0:MOVE -RUN: Stage-2:STATS +RUN: Stage-3:STATS PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1_n171 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out index 6e8a8efe91..07237da7de 100644 --- a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out @@ -23,38 +23,37 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:y + $hdt$_0:z Fetch Operator limit: -1 $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:y + $hdt$_0:z TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) $hdt$_2:x TableScan alias: x @@ -71,43 +70,48 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col1 + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -272,38 +276,37 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:y + $hdt$_0:z Fetch Operator limit: -1 $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:y + $hdt$_0:z TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: z + filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) $hdt$_2:x TableScan alias: x @@ -320,43 +323,48 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: z - filterExpr: ((ds = '2008-04-08') and (11.0D = 11.0D) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 62 Data size: 10974 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col1 + Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out b/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out index 2d007866a5..e51b66d6f8 100644 --- a/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_subquery2.q.out @@ -87,12 +87,12 @@ POSTHOOK: Input: default@y POSTHOOK: Input: default@z #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:y @@ -118,7 +118,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - 2 _col0 (type: int) $hdt$_2:z TableScan alias: z @@ -133,11 +132,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - 2 _col0 (type: int) + 0 _col3 (type: int) + 1 _col0 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -154,24 +152,30 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: int) 1 _col1 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col2 (type: string), _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col2 (type: string), _col0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out index 5cbd4230a3..84bcd42a7e 100644 --- a/ql/src/test/results/clientpositive/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/mergejoin.q.out @@ -3358,7 +3358,8 @@ POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08 #### A masked pattern was here #### NULL NULL NULL 98 val_98 2008-04-08 NULL NULL NULL 98 val_98 2008-04-08 -Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from (select * from tab_n9 where tab_n9.key = 0)a full outer join diff --git a/ql/src/test/results/clientpositive/mergejoins.q.out b/ql/src/test/results/clientpositive/mergejoins.q.out index ba48f44ddc..aa35914016 100644 --- a/ql/src/test/results/clientpositive/mergejoins.q.out +++ b/ql/src/test/results/clientpositive/mergejoins.q.out @@ -57,7 +57,9 @@ POSTHOOK: Input: default@e_n0 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -97,6 +99,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) TableScan alias: c_n0 filterExpr: val1 is not null (type: boolean) @@ -114,6 +142,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) TableScan alias: d_n0 filterExpr: val1 is not null (type: boolean) @@ -135,15 +189,11 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - 3 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -151,7 +201,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -159,7 +209,7 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) TableScan alias: e_n0 @@ -186,10 +236,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -211,7 +261,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -251,6 +302,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 791 Data size: 284760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 284760 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: string), _col4 (type: string) TableScan alias: c_n0 filterExpr: key is not null (type: boolean) @@ -271,16 +348,13 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Left Outer Join 0 to 2 + Left Outer Join 0 to 1 filter predicates: 0 {VALUE._col1} 1 - 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 1251 Data size: 668034 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git a/ql/src/test/results/clientpositive/mergejoins_mixed.q.out b/ql/src/test/results/clientpositive/mergejoins_mixed.q.out index 007cb00b59..31feb8da94 100644 --- a/ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ b/ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -18,7 +18,9 @@ POSTHOOK: Input: default@a_n5 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -58,6 +60,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) @@ -75,6 +103,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: d filterExpr: key is not null (type: boolean) @@ -95,19 +149,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Left Outer Join 1 to 2 - Left Outer Join 0 to 3 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - 3 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -131,7 +181,9 @@ POSTHOOK: Input: default@a_n5 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -171,6 +223,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) @@ -188,6 +266,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: d Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE @@ -204,19 +308,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Left Outer Join 1 to 2 - Right Outer Join 0 to 3 + Right Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - 3 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -240,7 +340,9 @@ POSTHOOK: Input: default@a_n5 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -280,6 +382,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: c Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE @@ -293,6 +421,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: d Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE @@ -309,19 +463,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 - Left Outer Join 0 to 3 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - 3 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -345,7 +495,9 @@ POSTHOOK: Input: default@a_n5 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -385,6 +537,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: c Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE @@ -398,6 +576,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: d Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE @@ -414,19 +618,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 - Right Outer Join 0 to 3 + Right Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - 3 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1540,7 +1740,8 @@ POSTHOOK: Input: default@a_n5 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1619,6 +1820,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: string) TableScan alias: d Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE @@ -1636,16 +1863,14 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 1 to 2 keys: - 0 _col3 (type: string) + 0 _col4 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/optimize_filter_literal.q.out b/ql/src/test/results/clientpositive/optimize_filter_literal.q.out index 3e8ca3a7f9..6eb573a400 100644 --- a/ql/src/test/results/clientpositive/optimize_filter_literal.q.out +++ b/ql/src/test/results/clientpositive/optimize_filter_literal.q.out @@ -136,7 +136,8 @@ POSTHOOK: Input: default@tab_n14@ds=2008-04-08 POSTHOOK: Output: default@tab_n14 POSTHOOK: Output: default@tab_n14@ds=2008-04-08 #### A masked pattern was here #### -Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from (select * from tab_n14 where tab_n14.key = 0)a full outer join diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out b/ql/src/test/results/clientpositive/ppd_join3.q.out index 5f7498d3f7..6b19607e80 100644 --- a/ql/src/test/results/clientpositive/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_join3.q.out @@ -28,7 +28,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src filterExpr: ((key <> '12') and (key <> '11') and (key < '400') and (key <> '13') and (key <> '4') and (key > '0') and (key <> '1')) (type: boolean) @@ -87,11 +113,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1746,7 +1770,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1784,6 +1809,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src filterExpr: ((key <> '12') and (key <> '11') and (key < '400') and (key <> '13') and (key <> '4') and (key > '0') and (key <> '1')) (type: boolean) @@ -1805,11 +1855,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out index 220ce7c0cf..d6e5988cf9 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out @@ -28,7 +28,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -67,6 +68,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: b filterExpr: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) @@ -88,11 +115,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 55 Data size: 24365 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -399,7 +424,8 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -438,6 +464,32 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: b filterExpr: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) @@ -459,11 +511,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 55 Data size: 24365 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index 91a69533fe..29dec2d863 100644 --- a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -35,39 +35,46 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-9 depends on stages: Stage-10 , consists of Stage-11, Stage-12, Stage-13, Stage-1 - Stage-11 has a backup stage: Stage-1 - Stage-6 depends on stages: Stage-11 - Stage-2 depends on stages: Stage-1, Stage-6, Stage-7, Stage-8 - Stage-12 has a backup stage: Stage-1 - Stage-7 depends on stages: Stage-12 - Stage-13 has a backup stage: Stage-1 - Stage-8 depends on stages: Stage-13 + Stage-12 is a root stage , consists of Stage-16, Stage-17, Stage-1 + Stage-16 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-16 + Stage-9 depends on stages: Stage-1, Stage-10, Stage-11, Stage-13 , consists of Stage-14, Stage-15, Stage-2 + Stage-14 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 + Stage-15 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-15 + Stage-2 + Stage-17 has a backup stage: Stage-1 + Stage-11 depends on stages: Stage-17 Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-18 is a root stage + Stage-13 depends on stages: Stage-18 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-14 + Stage: Stage-12 + Conditional Operator + + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:$hdt$_3:t1_n94 + $hdt$_1:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_3:t1_n94 + $hdt$_1:src2 TableScan - alias: t1_n94 + alias: src2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -77,7 +84,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src + alias: src1 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -94,7 +101,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -108,80 +115,47 @@ STAGE PLANS: Stage: Stage-9 Conditional Operator - Stage: Stage-11 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - $hdt$_1:src2 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - $hdt$_1:src2 - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 870 Data size: 75698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 1210 Data size: 105270 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -204,15 +178,12 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-12 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 - $hdt$_0:src1 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: $INTNAME TableScan @@ -220,72 +191,74 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - $hdt$_0:src1 - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 870 Data size: 75698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 1210 Data size: 105270 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-13 + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 870 Data size: 75698 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 Fetch Operator limit: -1 - $hdt$_1:src2 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:src1 TableScan @@ -303,8 +276,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - $hdt$_1:src2 + + Stage: Stage-11 + Map Reduce + Map Operator Tree: TableScan alias: src2 filterExpr: key is not null (type: boolean) @@ -316,36 +291,20 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 1210 Data size: 105270 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -385,33 +344,77 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 1210 Data size: 105270 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-18 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:$hdt$_3:t1_n94 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:$hdt$_3:t1_n94 + TableScan + alias: t1_n94 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index 01badd0925..4a84c33123 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -309,7 +309,9 @@ POSTHOOK: Input: default@t4_n17 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -341,6 +343,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) @@ -354,6 +382,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col11 (type: string) TableScan alias: d filterExpr: key is not null (type: boolean) @@ -371,22 +425,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 keys: - 0 key (type: string) + 0 _col10 (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -443,7 +493,12 @@ POSTHOOK: Input: default@t4_n17 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-9, Stage-3 + Stage-9 + Stage-7 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -475,6 +530,32 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) @@ -488,6 +569,70 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + handleSkewJoin: true + keys: + 0 _col5 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + 1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 1 + TableScan + HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col10 (type: string) + sort order: + + Map-reduce partition columns: _col10 (type: string) + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col11 (type: string) TableScan alias: d filterExpr: key is not null (type: boolean) @@ -505,22 +650,18 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 keys: - 0 key (type: string) + 0 _col10 (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1214 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1034,13 +1175,15 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-8, Stage-9, Stage-2 - Stage-8 - Stage-5 depends on stages: Stage-8 - Stage-2 depends on stages: Stage-5, Stage-6 - Stage-9 - Stage-6 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1 , consists of Stage-11, Stage-2 + Stage-11 + Stage-8 depends on stages: Stage-11 + Stage-2 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-2 , consists of Stage-10, Stage-3 + Stage-10 + Stage-6 depends on stages: Stage-10 + Stage-3 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1048,7 +1191,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key < 80) and (key < 100)) (type: boolean) + filterExpr: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 100) and (key < 80)) (type: boolean) @@ -1079,32 +1222,94 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + handleSkewJoin: true + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 86 Data size: 15308 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Conditional Operator + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + 1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 1 + TableScan + HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + outputColumnNames: _col0, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 86 Data size: 15308 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: string) TableScan alias: src - filterExpr: ((key < 100) and (key < 80)) (type: boolean) + filterExpr: (key < 80) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 100) and (key < 80)) (type: boolean) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key < 80) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 4785 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 135 Data size: 24030 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1122,15 +1327,12 @@ STAGE PLANS: Stage: Stage-7 Conditional Operator - Stage: Stage-8 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: 1 Fetch Operator limit: -1 - 2 - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: 1 TableScan @@ -1138,27 +1340,17 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - 2 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) outputColumnNames: _col0, _col3 Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col3)) @@ -1174,7 +1366,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1196,58 +1388,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - 0 - Fetch Operator - limit: -1 - 2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 0 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - 2 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - outputColumnNames: _col0, _col3 - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out index 54b8286b82..b3cc2a0f04 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out @@ -66,8 +66,8 @@ POSTHOOK: Input: default@t3_n27 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-10 @@ -82,9 +82,6 @@ STAGE PLANS: subquery1:a Fetch Operator limit: -1 - subquery1:c - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: $hdt$_0:a TableScan @@ -102,14 +99,13 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) $hdt$_2:c TableScan alias: c filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '8')) or (key = '3')) and key is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) @@ -119,7 +115,6 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) subquery1:a TableScan alias: a @@ -135,25 +130,8 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - subquery1:c - TableScan - alias: c - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((not (((key = '2') or (key = '8')) or (key = '3'))) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-2 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -170,22 +148,28 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE @@ -199,22 +183,28 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out b/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out index 95c8fb0f65..40a5b298a0 100644 --- a/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out @@ -66,8 +66,9 @@ POSTHOOK: Input: default@t3_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-4 is a root stage - Stage-0 depends on stages: Stage-1, Stage-4 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-5 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -107,12 +108,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '8')) or (key = '3')) and key is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) @@ -124,26 +153,33 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + Union + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -178,40 +214,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - TableScan - alias: c - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((not (((key = '2') or (key = '8')) or (key = '3'))) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt7.q.out b/ql/src/test/results/clientpositive/skewjoinopt7.q.out index 939f34991b..9f490b4b7a 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt7.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt7.q.out @@ -108,12 +108,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '8')) or (key = '3')) and key is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) @@ -125,47 +153,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + Union + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -202,33 +214,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - TableScan - alias: c - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((not (((key = '2') or (key = '8')) or (key = '3'))) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/skewjoinopt8.q.out b/ql/src/test/results/clientpositive/skewjoinopt8.q.out index ddb01f774c..4d6b31fd24 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt8.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt8.q.out @@ -106,12 +106,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: c filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = '3') or (key = '8')) and key is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) @@ -123,47 +151,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + Union + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 888 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 1618 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -200,33 +212,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - TableScan - alias: c - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((not ((key = '3') or (key = '8'))) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index 506d02f7f1..68d22a5445 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -46,8 +46,9 @@ POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' over POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3_n4 -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[14][tables = [$hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_2, $hdt$_3]] in Stage 'Stage-4:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -63,16 +64,17 @@ POSTHOOK: Input: default@smb_bucket_2_n4 POSTHOOK: Input: default@smb_bucket_3_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: c + alias: a filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -84,7 +86,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: d + alias: b filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -110,11 +112,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: a + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: c filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -126,7 +159,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: d filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -137,31 +170,20 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -169,8 +191,11 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product -Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[39][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Stage-9:MAPRED' is a cross product +Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-10:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -186,27 +211,29 @@ POSTHOOK: Input: default@smb_bucket_2_n4 POSTHOOK: Input: default@smb_bucket_3_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-13 is a root stage + Stage-9 depends on stages: Stage-13 + Stage-8 depends on stages: Stage-9, Stage-10 , consists of Stage-11, Stage-12, Stage-2 + Stage-11 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-11 + Stage-12 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-12 + Stage-2 + Stage-14 is a root stage + Stage-10 depends on stages: Stage-14 + Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 $hdt$_1:b Fetch Operator limit: -1 - $hdt$_2:$hdt$_2:c - Fetch Operator - limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:a + $hdt$_1:b TableScan - alias: a + alias: b filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -218,10 +245,12 @@ STAGE PLANS: keys: 0 1 - 2 - $hdt$_1:b + + Stage: Stage-9 + Map Reduce + Map Operator Tree: TableScan - alias: b + alias: a filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -229,11 +258,142 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 1 - 2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:$hdt$_2:c + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: $hdt$_2:$hdt$_2:c TableScan alias: c @@ -249,7 +409,7 @@ STAGE PLANS: 0 1 - Stage: Stage-5 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -268,26 +428,12 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 - 1 - 2 - Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -298,8 +444,11 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product -Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[39][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Stage-9:MAPRED' is a cross product +Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-10:MAPRED' is a cross product PREHOOK: query: select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY PREHOOK: Input: default@smb_bucket_1_n4 diff --git a/ql/src/test/results/clientpositive/stat_estimate_drill.q.out b/ql/src/test/results/clientpositive/stat_estimate_drill.q.out index 05c1033138..7c3b5a5e89 100644 --- a/ql/src/test/results/clientpositive/stat_estimate_drill.q.out +++ b/ql/src/test/results/clientpositive/stat_estimate_drill.q.out @@ -31,7 +31,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@t1 POSTHOOK: Lineage: t1.a SCRIPT [] -Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: insert into t3 select x1.a as a,x2.a as b,x3.a as c from t1 x1 diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.out index 4434edc69a..eca27ad8f4 100644 --- a/ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -1070,12 +1070,13 @@ POSTHOOK: Input: default@t2_n85 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3, Stage-5 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2, Stage-6 Stage-4 is a root stage - Stage-5 depends on stages: Stage-4, Stage-6 - Stage-6 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-5 is a root stage + Stage-6 depends on stages: Stage-5, Stage-7 + Stage-7 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1128,6 +1129,32 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint), _col3 (type: bigint) TableScan Reduce Output Operator key expressions: _col2 (type: int) @@ -1139,13 +1166,11 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col2 (type: int) + 1 _col2 (type: int) outputColumnNames: _col0, _col2, _col3, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col2 = 0L)) THEN (true) WHEN (_col2 is null) THEN (true) WHEN (_col5 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (false) ELSE (true) END (type: boolean) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE @@ -1161,7 +1186,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1198,7 +1223,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -1236,7 +1261,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1268,7 +1293,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out index b80cf61633..b3da634beb 100644 --- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out @@ -82,24 +82,31 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Map 4 Map Operator Tree: @@ -229,25 +236,33 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Map 4 Map Operator Tree: @@ -357,8 +372,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -388,29 +404,20 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 4 - 3 Map 5 - Statistics: Num rows: 1584 Data size: 12672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: w @@ -419,11 +426,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 2 + Statistics: Num rows: 1000 Data size: 86000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 5 + Statistics: Num rows: 1582 Data size: 12656 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Map 5 Map Operator Tree: @@ -440,7 +470,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -488,7 +518,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: hdfs://### HDFS PATH ### -5680 +0 PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM src1 x JOIN srcpart z ON (x.key = z.key) @@ -526,8 +556,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -557,30 +588,21 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - 3 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 4 - 3 Map 5 - Statistics: Num rows: 1584 Data size: 12672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: w @@ -589,11 +611,36 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 2 + Statistics: Num rows: 1000 Data size: 86000 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 5 + Statistics: Num rows: 1582 Data size: 12656 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Map 5 Map Operator Tree: @@ -610,7 +657,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -658,7 +705,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: hdfs://### HDFS PATH ### -5680 +0 PREHOOK: query: SELECT 1 PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -760,24 +807,31 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 6 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 6 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Map 6 Map Operator Tree: @@ -821,24 +875,31 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 value (type: string) 1 value (type: string) - 2 value (type: string) + outputColumnNames: _col1 input vertices: 0 Map 7 - 2 Map 10 - Statistics: Num rows: 265 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Statistics: Num rows: 162 Data size: 14418 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + input vertices: + 1 Map 10 + Statistics: Num rows: 263 Data size: 2104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Reducer 3 Execution mode: vectorized @@ -1026,25 +1087,33 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) + outputColumnNames: _col0 input vertices: 0 Map 1 - 2 Map 6 - Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 158 Data size: 13588 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 key (type: string) + input vertices: + 1 Map 6 + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Map 6 Map Operator Tree: @@ -1088,25 +1157,33 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 value (type: string) 1 value (type: string) - 2 value (type: string) + outputColumnNames: _col1 input vertices: 0 Map 7 - 2 Map 10 - Statistics: Num rows: 265 Data size: 2120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 162 Data size: 14418 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + input vertices: + 1 Map 10 + Statistics: Num rows: 263 Data size: 2104 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Reducer 3 Execution mode: vectorized @@ -1288,52 +1365,66 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 19 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 _col1 (type: string) - 1 value (type: string) - 2 value (type: string) + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col1 input vertices: - 1 Map 5 - 2 Map 6 - Statistics: Num rows: 196 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + 1 Map 4 + Statistics: Num rows: 4 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + input vertices: + 1 Map 6 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: y1 - filterExpr: ((value < 'zzzzzzzz') and (key < 'zzzzzzzz')) (type: boolean) + filterExpr: (key is not null and (value < 'zzzzzzzz')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 'zzzzzzzz') and (value < 'zzzzzzzz')) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value < 'zzzzzzzz') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map 5 Map Operator Tree: @@ -1499,54 +1590,70 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - 2 Map 4 - Statistics: Num rows: 19 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 _col1 (type: string) - 1 value (type: string) - 2 value (type: string) + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col1 input vertices: - 1 Map 5 - 2 Map 6 - Statistics: Num rows: 196 Data size: 1568 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 4 + Statistics: Num rows: 4 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + input vertices: + 1 Map 6 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized Map 4 Map Operator Tree: TableScan alias: y1 - filterExpr: ((value < 'zzzzzzzz') and (key < 'zzzzzzzz')) (type: boolean) + filterExpr: (key is not null and (value < 'zzzzzzzz')) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key < 'zzzzzzzz') and (value < 'zzzzzzzz')) (type: boolean) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value < 'zzzzzzzz') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map 5 Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_outer_join6.q.out b/ql/src/test/results/clientpositive/vector_outer_join6.q.out index a209132e0a..bd938f631a 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join6.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -138,7 +138,7 @@ POSTHOOK: Input: default@tjoin1_n0 POSTHOOK: Input: default@tjoin2_n0 POSTHOOK: Input: default@tjoin3 #### A masked pattern was here #### -{"optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `tj2rnum`, `t1`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-4":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-4"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}}}}},"Stage-4":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_16","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0","_col4":"2:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"},{"":"Left Outer Join 1 to 2"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","One MapJoin Condition IS false"]},"outputColumnNames:":["_col0","_col2","_col4"],"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_17","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col4 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2","_col2":"_col4"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2]"},"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_20"}}}}}} +{"optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `tj2rnum`, `t1`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_20"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col3 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_18"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_22","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0","_col3":"1:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_23","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2","_col4":"1:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col3 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:int","col 1:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col4"],"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col4 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2","_col2":"_col4"},"outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2]"},"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_25","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_26"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_27"}}}}}} PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from (select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY @@ -173,7 +173,7 @@ POSTHOOK: Input: default@tjoin1_n0 POSTHOOK: Input: default@tjoin2_n0 POSTHOOK: Input: default@tjoin3 #### A masked pattern was here #### -{"optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-4":{"DEPENDENT STAGES":"Stage-5"},"Stage-0":{"DEPENDENT STAGES":"Stage-4"}},"STAGE PLANS":{"Stage-5":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"c1 (type: int)","columnExprMap:":{"_col0":"c1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_13"}}}}}}}}},"Stage-4":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_16","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"},{"":"Left Outer Join 1 to 2"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)","2":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","One MapJoin Condition IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_17","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_20"}}}}}} +{"optimizedSQL":"SELECT `t`.`rnum` AS `tj1rnum`, `t0`.`rnum` AS `rnumt3`\nFROM (SELECT `rnum`, `c1`\nFROM `default`.`tjoin1_n0`) AS `t`\nLEFT JOIN (SELECT `rnum`, `c1`\nFROM `default`.`tjoin2_n0`) AS `t0` ON `t`.`c1` = `t0`.`c1`\nLEFT JOIN (SELECT `c1`\nFROM `default`.`tjoin3`) AS `t1` ON `t0`.`c1` = `t1`.`c1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:tjoin2_n0":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:tjoin2_n0":{"TableScan":{"alias:":"tjoin2_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin2_n0","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_20"}}}}}},"$hdt$_2:tjoin3":{"TableScan":{"alias:":"tjoin3","columns:":["c1"],"database:":"default","Statistics:":"Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin3","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"c1 (type: int)","columnExprMap:":{"_col0":"c1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col3 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_18"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1_n0","columns:":["rnum","c1"],"database:":"default","Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","table:":"tjoin1_n0","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","columnExprMap:":{"_col0":"rnum","_col1":"c1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_22","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"1:_col0","_col3":"1:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 1:int"],"bigTableValueExpressions:":["col 0:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_23","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col3 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:int","col 1:int"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int)","columnExprMap:":{"_col0":"_col0","_col1":"_col2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1]"},"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_25","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_26"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_27"}}}}}} PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from (select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vectorization_sum_if_when.q.out b/ql/src/test/results/clientpositive/vectorization_sum_if_when.q.out index 61090dc761..155d377997 100644 --- a/ql/src/test/results/clientpositive/vectorization_sum_if_when.q.out +++ b/ql/src/test/results/clientpositive/vectorization_sum_if_when.q.out @@ -31,7 +31,9 @@ POSTHOOK: query: create table vectorization_sum_if_when_b (x int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@vectorization_sum_if_when_b -Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: insert into table vectorization_sum_if_when_b select least(t1.x + t2.x + t3.x + t4.x, 1) from vectorization_sum_if_when_a t1, vectorization_sum_if_when_a t2, vectorization_sum_if_when_a t3, vectorization_sum_if_when_a t4 PREHOOK: type: QUERY PREHOOK: Input: default@vectorization_sum_if_when_a -- 2.14.3 (Apple Git-98)