diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q index a1d5249448..90fe79d5cb 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q @@ -26,6 +26,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; +set hive.auto.convert.join.noconditionaltask.size=10; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q index e65344dd6d..5a84d5fe44 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q @@ -20,6 +20,8 @@ set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; set hive.auto.convert.sortmerge.join=true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; +set hive.auto.convert.join.noconditionaltask.size=10; + -- One of the subqueries contains a union, so it should not be converted to a sort-merge join. explain select count(*) from diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q index 11499f8eab..0d158af173 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q @@ -19,14 +19,13 @@ load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bu load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); set hive.auto.convert.join=true; - +set hive.auto.convert.join.noconditionaltask.size=10; explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin=true; set hive.optimize.bucketmapjoin.sortedmerge=true; - -- Since size is being used to find the big table, the order of the tables in the join does not matter -- The tables are only bucketed and not sorted, the join should not be converted -- Currenly, a join is only converted to a sort-merge join without a hint, automatic conversion to diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q index b512cc5c74..8c5ae3ed71 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q @@ -23,6 +23,7 @@ set hive.auto.convert.join=true; set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.auto.convert.join.noconditionaltask.size=10; CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q index 1c868dcd15..cf29925f04 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q @@ -23,6 +23,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.join=true; -- A SMB join followed by a mutli-insert +set hive.auto.convert.join.noconditionaltask.size=200; explain from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -42,7 +43,6 @@ select * from dest1; select * from dest2; set hive.auto.convert.join.noconditionaltask=true; -set hive.auto.convert.join.noconditionaltask.size=200; set hive.mapjoin.hybridgrace.minwbsize=100; set hive.mapjoin.hybridgrace.minnumpartitions=2; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q index dd59c74fc0..e8386496e7 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q @@ -17,6 +17,7 @@ set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=10; -- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed explain diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q index 1480b15488..e655fe273f 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q @@ -17,6 +17,7 @@ set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=10; explain select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q index 12ab1fa1d1..4fcdc1d671 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q @@ -67,7 +67,34 @@ value, from stage_bucket_big where -file_tag='1'; +file_tag='1'; + +set hive.auto.convert.join.noconditionaltask.size=10; + +explain select + a.key , + a.value , + b.value , + 'day1' as day, + 1 as pri + from + ( + select + key, + value + from bucket_big where day='day1' + ) a + left outer join + ( + select + key, + value + from bucket_small + where pri between 1 and 2 + ) b + on + (a.key = b.key) +; select a.key , diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q index e77d937991..f108aae9d8 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q @@ -21,6 +21,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ; +set hive.auto.convert.join.noconditionaltask.size=10; -- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q index 183f03335a..d7fb628918 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q @@ -21,6 +21,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; +set hive.auto.convert.join.noconditionaltask.size=100; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q index 21f273a17b..ee9b91c333 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q @@ -23,6 +23,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ; +set hive.auto.convert.join.noconditionaltask.size=200; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q index cf12331e13..5642592f28 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q @@ -27,6 +27,8 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ; +set hive.auto.convert.join.noconditionaltask.size=10; + -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q index 5ec4e26d4b..c505b02813 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q @@ -29,6 +29,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; +set hive.auto.convert.join.noconditionaltask.size=10; -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q index f95631f429..e33f07111e 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q @@ -20,6 +20,8 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join=true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; +set hive.auto.convert.join.noconditionaltask.size=10; + -- The join is being performed as part of sub-query. It should be converted to a sort-merge join explain select count(*) from ( diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q index 4a14587857..9f4791d605 100644 --- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q +++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q @@ -11,6 +11,7 @@ set hive.merge.mapredfiles=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ; set hive.auto.convert.sortmerge.join.to.mapjoin=true; +set hive.auto.convert.join.noconditionaltask.size=10; -- Create two bucketed and sorted tables CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) @@ -32,6 +33,7 @@ INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '2') SELECT * where key < 10; FROM src INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * where key < 100; +set hive.convert.join.bucket.mapjoin.tez = true; -- Insert data into the bucketed table by selecting from another bucketed table -- This should be a map-only operation EXPLAIN diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q index ec0c2dc254..894ba81d97 100644 --- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q +++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q @@ -28,6 +28,7 @@ INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT key, key+1, value -- Insert data into the bucketed table by selecting from another bucketed table -- This should be a map-only operation, since the sort-order matches +set hive.auto.convert.join.noconditionaltask.size=800; EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q index 45635c1209..7ffa79addd 100644 --- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q +++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q @@ -11,6 +11,7 @@ set hive.merge.mapredfiles=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ; set hive.auto.convert.sortmerge.join.to.mapjoin=true; +set hive.auto.convert.join.noconditionaltask.size=800; -- Create two bucketed and sorted tables CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING) diff --git a/ql/src/test/queries/clientpositive/quotedid_smb.q b/ql/src/test/queries/clientpositive/quotedid_smb.q index 25d1f0eee7..62b3046406 100644 --- a/ql/src/test/queries/clientpositive/quotedid_smb.q +++ b/ql/src/test/queries/clientpositive/quotedid_smb.q @@ -28,6 +28,12 @@ set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.auto.convert.sortmerge.join.to.mapjoin=false; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; +set hive.auto.convert.join.noconditionaltask.size=1000; + +explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` +from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` +where a.`x+1` < '11' +; select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` where a.`x+1` < '11' diff --git a/ql/src/test/queries/clientpositive/smb_cache.q b/ql/src/test/queries/clientpositive/smb_cache.q index e415e51053..94991985bf 100644 --- a/ql/src/test/queries/clientpositive/smb_cache.q +++ b/ql/src/test/queries/clientpositive/smb_cache.q @@ -1,4 +1,4 @@ -set hive.explain.user=true; +set hive.explain.user=false; create table bug_201_input_a ( userid int ) clustered by (userid) sorted by (userid) into 64 BUCKETS ; @@ -86,6 +86,7 @@ select distinct(userid) as userid from ( 162031843 ,141532840 ,154222699 ,109320121 ,155198842 )) as arr )a ) b; + explain select t1.userid, @@ -101,7 +102,7 @@ t1.userid, set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=100 ; +set hive.auto.convert.join.noconditionaltask.size=5; set hive.auto.convert.sortmerge.join=true set hive.convert.join.bucket.mapjoin.tez = true; set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out index 22ac2a201a..3a90b05eb6 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out @@ -63,13 +63,20 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-5 + Stage-7 is a root stage , consists of Stage-8, Stage-9, Stage-1 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-1, Stage-5, Stage-6 + Stage-9 has a backup stage: Stage-1 + Stage-6 depends on stages: Stage-9 + Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:a @@ -180,6 +187,157 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0-subquery1:$hdt$_0-subquery1:a + Fetch Operator + limit: -1 + $hdt$_0-subquery2:$hdt$_0-subquery2:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0-subquery1:$hdt$_0-subquery1:a + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + $hdt$_0-subquery2:$hdt$_0-subquery2:a + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -230,36 +388,43 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 + Stage-7 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-7 + Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:a + $hdt$_1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:a + $hdt$_1:a TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: int) - mode: final + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -268,10 +433,11 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) + Group By Operator + keys: key (type: int) + mode: final outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -284,12 +450,23 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -304,6 +481,118 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out index 243a49b45f..81f5b18fc1 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out @@ -102,15 +102,23 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 + Stage-7 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-7 + Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:a + $hdt$_1:b Fetch Operator limit: -1 Partition Description: @@ -121,101 +129,159 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big Alias -> Map Local Operator Tree: - $hdt$_0:a + $hdt$_1:b TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - Position of Big Table: 1 + Position of Big Table: 0 - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - Position of Big Table: 1 + Position of Big Table: 0 Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -366,8 +432,47 @@ STAGE PLANS: name: default.bucket_small name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [$hdt$_1:b] - /bucket_big/ds=2008-04-09 [$hdt$_1:b] + /bucket_small/ds=2008-04-08 [$hdt$_0:a] + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -398,40 +503,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@bucket_big -PREHOOK: Input: default@bucket_big@ds=2008-04-08 -PREHOOK: Input: default@bucket_big@ds=2008-04-09 -PREHOOK: Input: default@bucket_small -PREHOOK: Input: default@bucket_small@ds=2008-04-08 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucket_big -POSTHOOK: Input: default@bucket_big@ds=2008-04-08 -POSTHOOK: Input: default@bucket_big@ds=2008-04-09 -POSTHOOK: Input: default@bucket_small -POSTHOOK: Input: default@bucket_small@ds=2008-04-08 -#### A masked pattern was here #### -38 -PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key -PREHOOK: type: QUERY -POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-5 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -505,7 +577,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 1 - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -533,13 +605,24 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -692,35 +775,1162 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [$hdt$_1:b] /bucket_big/ds=2008-04-09 [$hdt$_1:b] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Path -> Partition: #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [$hdt$_1:b] + /bucket_big/ds=2008-04-09 [$hdt$_1:b] + /bucket_small/ds=2008-04-08 [$hdt$_0:a] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 + Stage-7 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-7 + Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:b + Fetch Operator + limit: -1 + Partition Description: + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Alias -> Map Local Operator Tree: + $hdt$_1:b + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [$hdt$_0:a] + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + Partition Description: + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [$hdt$_1:b] + /bucket_big/ds=2008-04-09 [$hdt$_1:b] + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [$hdt$_1:b] + /bucket_big/ds=2008-04-09 [$hdt$_1:b] + /bucket_small/ds=2008-04-08 [$hdt$_0:a] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out index 3d0559a47c..1ed3dd0e47 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out @@ -134,21 +134,39 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-8:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-3 depends on stages: Stage-9 + Stage-13 is a root stage , consists of Stage-16, Stage-17, Stage-18, Stage-1 + Stage-16 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-16 + Stage-9 depends on stages: Stage-1, Stage-10, Stage-11, Stage-12 , consists of Stage-14, Stage-15, Stage-2 + Stage-14 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 + Stage-15 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-15 + Stage-2 + Stage-17 has a backup stage: Stage-1 + Stage-11 depends on stages: Stage-17 + Stage-18 has a backup stage: Stage-1 + Stage-12 depends on stages: Stage-18 + Stage-1 Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-13 + Conditional Operator + + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:a + $hdt$_0:c Fetch Operator limit: -1 Partition Description: @@ -159,23 +177,23 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -183,51 +201,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - $hdt$_2:b - Fetch Operator - limit: -1 - Partition Description: + name: default.bucket_big + name: default.bucket_big Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: - bucket_count 3 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium - numFiles 3 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 170 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -235,24 +249,24 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 3 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_medium - name: default.bucket_medium - $hdt$_3:d + name: default.bucket_big + name: default.bucket_big + $hdt$_2:b Fetch Operator limit: -1 Partition Description: @@ -305,25 +319,25 @@ STAGE PLANS: name: default.bucket_medium name: default.bucket_medium Alias -> Map Local Operator Tree: - $hdt$_1:a + $hdt$_0:c TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Position of Big Table: 2 + Position of Big Table: 0 $hdt$_2:b TableScan alias: b @@ -342,35 +356,23 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Position of Big Table: 2 - $hdt$_3:d - TableScan - alias: d - Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE - GatherStats: false - Select Operator - Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 - Position of Big Table: 0 + Position of Big Table: 0 - Stage: Stage-3 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -379,28 +381,26 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Position of Big Table: 2 + Position of Big Table: 0 Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Position of Big Table: 0 - Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -603,8 +603,234 @@ STAGE PLANS: name: default.bucket_small name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [$hdt$_0:c] - /bucket_big/ds=2008-04-09 [$hdt$_0:c] + /bucket_small/ds=2008-04-08 [$hdt$_1:a] + + Stage: Stage-9 + Conditional Operator + + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_3:d + Fetch Operator + limit: -1 + Partition Description: + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Alias -> Map Local Operator Tree: + $hdt$_3:d + TableScan + alias: d + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Select Operator + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + Position of Big Table: 0 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Position of Big Table: 0 + Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -635,13 +861,1417 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + GatherStats: false + HashTable Sink Operator + keys: + 0 + 1 + Position of Big Table: 1 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Select Operator + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Position of Big Table: 1 + Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Truncated Path -> Alias: + /bucket_medium/ds=2008-04-08 [$hdt$_3:d] + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + TableScan + alias: d + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Select Operator + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Truncated Path -> Alias: + /bucket_medium/ds=2008-04-08 [$hdt$_3:d] +#### A masked pattern was here #### + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:c + Fetch Operator + limit: -1 + Partition Description: + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + $hdt$_1:a + Fetch Operator + limit: -1 + Partition Description: + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Alias -> Map Local Operator Tree: + $hdt$_0:c + TableScan + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 1 + $hdt$_1:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 1 + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 1 + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_medium/ds=2008-04-08 [$hdt$_2:b] + + Stage: Stage-18 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:a + Fetch Operator + limit: -1 + Partition Description: + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + $hdt$_2:b + Fetch Operator + limit: -1 + Partition Description: + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Alias -> Map Local Operator Tree: + $hdt$_1:a + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 2 + $hdt$_2:b + TableScan + alias: b + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 2 + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 2 + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [$hdt$_0:c] + /bucket_big/ds=2008-04-09 [$hdt$_0:c] + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false + TableScan + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + tag: 2 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [$hdt$_0:c] + /bucket_big/ds=2008-04-09 [$hdt$_0:c] + /bucket_medium/ds=2008-04-08 [$hdt$_2:b] + /bucket_small/ds=2008-04-08 [$hdt$_1:a] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns + columns.types + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-8:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out index 36bfac3f4c..8fc43e468c 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_1.q.out @@ -112,11 +112,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -130,22 +129,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -194,7 +182,6 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -208,15 +195,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -232,13 +216,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -287,7 +269,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -405,77 +386,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -483,30 +437,68 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: bucket_count 4 bucket_field_name key @@ -549,61 +541,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -611,25 +572,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -698,77 +660,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -776,30 +711,68 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: bucket_count 4 bucket_field_name key @@ -842,61 +815,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 298 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -904,25 +846,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out index b8f10fec67..6b19524a4c 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out @@ -71,9 +71,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -88,27 +89,14 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 5 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: a @@ -120,27 +108,14 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 5 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: a @@ -157,16 +132,30 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -240,8 +229,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -252,19 +240,10 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: int) - mode: final + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: a @@ -272,18 +251,17 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) + Group By Operator + keys: key (type: int) + mode: final outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 0 Map 1 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -295,8 +273,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out index 37d97d2252..428a91c5a7 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_11.q.out @@ -110,8 +110,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -191,7 +191,7 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -205,29 +205,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -332,6 +317,30 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false @@ -400,8 +409,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -481,7 +490,7 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -495,29 +504,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -622,6 +616,30 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false @@ -690,8 +708,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -767,7 +785,7 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -777,29 +795,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -904,6 +907,30 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false @@ -972,8 +999,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1049,7 +1076,7 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -1059,32 +1086,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Estimated key counts: Map 1 => 1, Map 4 => 23 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - input vertices: - 0 Map 1 - 2 Map 4 - Position of Big Table: 1 - Statistics: Num rows: 244 Data size: 67852 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: key (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -1189,7 +1198,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -1311,6 +1320,32 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [c] /bucket_big/ds=2008-04-09 [c] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Statistics: Num rows: 244 Data size: 67852 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out index 655573650b..c14441a260 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_12.q.out @@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -148,12 +148,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE) Reducer 4 <- Map 3 (XPROD_EDGE), Map 6 (XPROD_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -167,22 +166,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -231,7 +219,6 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -245,22 +232,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -309,7 +285,6 @@ STAGE PLANS: name: default.bucket_medium Truncated Path -> Alias: /bucket_medium/ds=2008-04-08 [b] - Map 3 Map Operator Tree: TableScan alias: c @@ -323,18 +298,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 19719 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 Inner Join 1 to 2 - Estimated key counts: Map 1 => 1, Map 2 => 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - input vertices: - 0 Map 1 - 1 Map 2 Position of Big Table: 2 Statistics: Num rows: 244 Data size: 43381 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -344,13 +315,11 @@ STAGE PLANS: tag: 0 auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -399,7 +368,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -581,7 +549,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index a6d73097e0..a19247264d 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -78,13 +78,21 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### - Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) -#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -94,15 +102,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -129,27 +135,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out index 2d03e8cb72..8e5554bf05 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out @@ -49,27 +49,32 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -81,23 +86,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -156,11 +144,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -169,14 +156,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 2 Map Operator Tree: TableScan alias: b @@ -185,14 +164,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 189 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Right Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 0 Map 1 Statistics: Num rows: 207 Data size: 796 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -204,7 +181,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out index ce41569f49..d6cb17b0a7 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out @@ -49,27 +49,32 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -81,23 +86,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -135,11 +123,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -148,14 +135,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 2 Map Operator Tree: TableScan alias: b @@ -164,14 +143,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Right Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 0 Map 1 Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -183,7 +160,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs Reducer 3 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out index cb8564fd78..4796008f2b 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out @@ -166,6 +166,129 @@ POSTHOOK: Input: default@stage_bucket_big@file_tag=1 POSTHOOK: Output: default@bucket_big@day=day1/pri=1 POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE [(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, comment:null), ] POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE [(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain select + a.key , + a.value , + b.value , + 'day1' as day, + 1 as pri + from + ( + select + key, + value + from bucket_big where day='day1' + ) a + left outer join + ( + select + key, + value + from bucket_small + where pri between 1 and 2 + ) b + on + (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select + a.key , + a.value , + b.value , + 'day1' as day, + 1 as pri + from + ( + select + key, + value + from bucket_big where day='day1' + ) a + left outer join + ( + select + key, + value + from bucket_small + where pri between 1 and 2 + ) b + on + (a.key = b.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: bucket_big + Statistics: Num rows: 5 Data size: 1006 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 1006 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 5 Data size: 1006 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: bucket_small + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col3 (type: string), 'day1' (type: string), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key , a.value , diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out index 90d362e981..ee70e6e1b7 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_2.q.out @@ -94,77 +94,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -172,30 +145,68 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: bucket_count 2 bucket_field_name key @@ -238,61 +249,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -300,25 +280,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -387,77 +368,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -465,30 +419,68 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: bucket_count 2 bucket_field_name key @@ -531,61 +523,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 594 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -593,25 +554,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out index 365f63c0ad..26439b90a5 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_3.q.out @@ -94,11 +94,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -112,22 +111,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -176,7 +164,6 @@ STAGE PLANS: name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -226,7 +213,6 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] /bucket_small/ds=2008-04-09 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -240,15 +226,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -264,13 +247,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -387,77 +368,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 58 Data size: 16300 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -465,59 +419,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: bucket_count 2 bucket_field_name key @@ -560,31 +484,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 58 Data size: 16300 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -592,26 +555,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -680,77 +642,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 58 Data size: 16300 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -758,59 +693,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: bucket_count 2 bucket_field_name key @@ -853,31 +758,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 58 Data size: 16300 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 56 Data size: 15737 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 61 Data size: 17310 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -885,26 +829,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] Reducer 2 Execution mode: llap Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out index 8ee44b3493..43500fb53c 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_4.q.out @@ -110,11 +110,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -128,22 +127,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -192,7 +180,6 @@ STAGE PLANS: name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -242,7 +229,6 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] /bucket_small/ds=2008-04-09 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -256,15 +242,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 4 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -280,13 +263,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -403,77 +384,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 4 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -481,59 +435,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: bucket_count 4 bucket_field_name key @@ -576,31 +500,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -608,26 +571,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -696,77 +658,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 4 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -774,59 +709,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: bucket_count 4 bucket_field_name key @@ -869,31 +774,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 7432 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 28 Data size: 8175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: llap + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -901,26 +845,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] Reducer 2 Execution mode: llap Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out index 83d5a968b7..a88396b69d 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out @@ -127,11 +127,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -145,22 +144,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -209,7 +197,6 @@ STAGE PLANS: name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -259,7 +246,6 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] /bucket_small/ds=2008-04-09 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -273,15 +259,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 4 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -297,13 +280,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -352,7 +333,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -472,77 +452,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 4 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -550,48 +503,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -599,78 +551,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -678,48 +639,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -727,26 +687,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -817,77 +777,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 4 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -895,48 +828,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -944,78 +876,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 54 Data size: 15252 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1188 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 52 Data size: 14687 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 57 Data size: 16155 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1023,48 +964,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1072,26 +1012,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out index 0e0428481b..5ae16990e7 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out @@ -127,11 +127,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -145,22 +144,11 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -209,7 +197,6 @@ STAGE PLANS: name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -259,7 +246,6 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] /bucket_small/ds=2008-04-09 [a] - Map 2 Map Operator Tree: TableScan alias: b @@ -273,15 +259,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -297,13 +280,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint) auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -352,7 +333,6 @@ STAGE PLANS: name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -472,77 +452,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -550,48 +503,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -599,78 +551,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -678,48 +639,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -727,26 +687,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false @@ -817,77 +777,50 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -895,48 +828,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -944,78 +876,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Map 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 116 Data size: 32232 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 596 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 111 Data size: 30842 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Statistics: Num rows: 122 Data size: 33926 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap - LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1023,48 +964,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1072,26 +1012,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] Reducer 2 Execution mode: llap Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out index 8bd3d126c1..1d1537cda6 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out @@ -57,13 +57,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -73,14 +83,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -92,26 +100,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -172,13 +160,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -188,15 +186,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -211,26 +207,6 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -307,7 +283,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -315,6 +290,17 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -324,15 +310,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) @@ -345,26 +329,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -474,14 +438,25 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -491,15 +466,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -513,15 +486,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -533,13 +499,38 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -549,28 +540,35 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Reducer 3 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -659,7 +657,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -675,14 +672,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -694,26 +700,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -788,13 +774,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -804,14 +800,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -823,26 +817,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -941,7 +915,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -957,14 +930,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -976,26 +958,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1082,7 +1044,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1098,27 +1059,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Map 3 Map Operator Tree: TableScan alias: a @@ -1130,13 +1070,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1201,8 +1151,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1217,27 +1167,14 @@ STAGE PLANS: expressions: (key + 1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -1259,6 +1196,25 @@ STAGE PLANS: Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1316,7 +1272,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1332,14 +1287,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1351,26 +1315,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1429,7 +1373,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1445,14 +1388,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1464,26 +1416,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1552,7 +1484,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1568,30 +1499,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Map 3 Map Operator Tree: TableScan alias: a @@ -1603,14 +1510,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 4 Map Operator Tree: TableScan alias: a @@ -1622,13 +1521,25 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1711,13 +1622,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1727,14 +1648,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1746,26 +1665,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1838,13 +1737,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1854,14 +1763,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1873,26 +1780,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1953,13 +1840,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1969,15 +1866,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1992,26 +1887,6 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2088,7 +1963,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -2096,6 +1970,17 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2105,15 +1990,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) @@ -2126,26 +2009,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2255,14 +2118,25 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2272,15 +2146,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2294,15 +2166,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -2314,13 +2179,38 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2330,28 +2220,35 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Reducer 3 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2440,7 +2337,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2456,14 +2352,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2475,26 +2380,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2569,13 +2454,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2585,14 +2480,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2604,26 +2497,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2722,7 +2595,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2738,14 +2610,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2757,26 +2638,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2863,7 +2724,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2879,14 +2739,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2898,26 +2767,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2978,7 +2827,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2994,14 +2842,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -3013,26 +2870,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3091,7 +2928,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -3107,14 +2943,23 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -3126,26 +2971,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3214,7 +3039,6 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -3230,30 +3054,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Map 3 Map Operator Tree: TableScan alias: a @@ -3265,14 +3065,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 4 Map Operator Tree: TableScan alias: a @@ -3284,13 +3076,25 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3373,13 +3177,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3389,14 +3203,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -3408,26 +3220,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index b907c2dbd8..67d4db3925 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -99,51 +99,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) @@ -156,8 +144,7 @@ STAGE PLANS: Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -279,51 +266,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 20 Data size: 3900 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 20 Data size: 3900 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) @@ -336,8 +311,7 @@ STAGE PLANS: Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -483,51 +457,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 168 Data size: 31740 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 168 Data size: 31740 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) @@ -540,8 +502,7 @@ STAGE PLANS: Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -693,51 +654,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + alias: test_table2 + Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + alias: test_table1 + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) @@ -750,8 +699,7 @@ STAGE PLANS: Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -885,51 +833,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + alias: test_table2 + Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + alias: test_table1 + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) @@ -942,8 +878,7 @@ STAGE PLANS: Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1077,51 +1012,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + alias: test_table2 + Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + alias: test_table1 + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) @@ -1134,8 +1057,7 @@ STAGE PLANS: Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out index f5f5f91e82..1d2a889195 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out @@ -77,51 +77,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col6 - input vertices: - 0 Map 1 Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) @@ -134,8 +122,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -264,51 +251,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col6 - input vertices: - 0 Map 1 Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) @@ -321,8 +296,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -451,51 +425,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col6 - input vertices: - 0 Map 1 Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string) @@ -508,8 +470,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -573,51 +534,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col6 - input vertices: - 0 Map 1 Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string) @@ -630,8 +579,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -701,51 +649,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col6 - input vertices: - 0 Map 1 Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) @@ -758,8 +694,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -906,51 +841,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col6 - input vertices: - 0 Map 1 Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) @@ -963,8 +886,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1121,51 +1043,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col6 - input vertices: - 0 Map 1 Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) @@ -1178,8 +1088,7 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out index 7b380562ac..2e083b81ae 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out @@ -77,51 +77,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) @@ -134,8 +122,7 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -261,51 +248,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + alias: test_table2 + Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + alias: test_table1 + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) @@ -318,8 +293,7 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator @@ -451,51 +425,39 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + alias: test_table2 + Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 2 + Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + alias: test_table1 + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean) - Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) @@ -508,8 +470,7 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out b/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out index 8e850f50ce..97721d0c36 100644 --- a/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out +++ b/ql/src/test/results/clientpositive/llap/quotedid_smb.q.out @@ -42,6 +42,69 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@src_b2 POSTHOOK: Lineage: src_b2.!@#$%^&*()_q SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_b2.x+1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` +from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` +where a.`x+1` < '11' +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` +from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` +where a.`x+1` < '11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: !@#$%^&*()_q is not null (type: boolean) + Statistics: Num rows: 475 Data size: 166409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: x+1 (type: string), !@#$%^&*()_q (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 475 Data size: 166409 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (!@#$%^&*()_q is not null and (x+1 < '11')) (type: boolean) + Statistics: Num rows: 141 Data size: 49397 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: x+1 (type: string), !@#$%^&*()_q (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 141 Data size: 49397 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 522 Data size: 183049 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 522 Data size: 183049 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` where a.`x+1` < '11' @@ -56,15 +119,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src_b POSTHOOK: Input: default@src_b2 #### A masked pattern was here #### -10 val_10 10 val_10 -100 val_100 100 val_100 -100 val_100 100 val_100 -100 val_100 100 val_100 -100 val_100 100 val_100 -104 val_104 104 val_104 -104 val_104 104 val_104 -104 val_104 104 val_104 -104 val_104 104 val_104 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 @@ -79,3 +133,12 @@ POSTHOOK: Input: default@src_b2 103 val_103 103 val_103 103 val_103 103 val_103 105 val_105 105 val_105 +10 val_10 10 val_10 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +100 val_100 100 val_100 +104 val_104 104 val_104 +104 val_104 104 val_104 +104 val_104 104 val_104 +104 val_104 104 val_104 diff --git a/ql/src/test/results/clientpositive/llap/smb_cache.q.out b/ql/src/test/results/clientpositive/llap/smb_cache.q.out index 60d4ff0ba0..e3a4c95624 100644 --- a/ql/src/test/results/clientpositive/llap/smb_cache.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_cache.q.out @@ -200,37 +200,80 @@ t1.userid, from bug_201_input_b as t1 join bug_201_input_a as fa on (t1.userid = fa.userid) POSTHOOK: type: QUERY -Plan optimized by CBO. +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 130 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: userid is not null (type: boolean) + Statistics: Num rows: 124 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 124 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 124 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: fa + Statistics: Num rows: 527 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: userid is not null (type: boolean) + Statistics: Num rows: 501 Data size: 1908 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 501 Data size: 1908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 501 Data size: 1908 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 551 Data size: 2098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 551 Data size: 2098 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 llap - File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=551 width=3) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=124 width=3) - Output:["_col0"] - Filter Operator [FIL_13] (rows=124 width=3) - predicate:userid is not null - TableScan [TS_0] (rows=130 width=3) - default@bug_201_input_b,t1,Tbl:COMPLETE,Col:NONE,Output:["userid"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=501 width=3) - Output:["_col0"] - Filter Operator [FIL_14] (rows=501 width=3) - predicate:userid is not null - TableScan [TS_3] (rows=527 width=3) - default@bug_201_input_a,fa,Tbl:COMPLETE,Col:NONE,Output:["userid"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select t1.userid, @@ -280,37 +323,80 @@ t1.userid, from bug_201_input_b as t1 join bug_201_input_a as fa on (t1.userid = fa.userid) POSTHOOK: type: QUERY -Plan optimized by CBO. +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 130 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: userid is not null (type: boolean) + Statistics: Num rows: 124 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 124 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 124 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: fa + Statistics: Num rows: 527 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: userid is not null (type: boolean) + Statistics: Num rows: 501 Data size: 1908 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: userid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 501 Data size: 1908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 501 Data size: 1908 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 551 Data size: 2098 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 551 Data size: 2098 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 llap - File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=551 width=3) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=124 width=3) - Output:["_col0"] - Filter Operator [FIL_13] (rows=124 width=3) - predicate:userid is not null - TableScan [TS_0] (rows=130 width=3) - default@bug_201_input_b,t1,Tbl:COMPLETE,Col:NONE,Output:["userid"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=501 width=3) - Output:["_col0"] - Filter Operator [FIL_14] (rows=501 width=3) - predicate:userid is not null - TableScan [TS_3] (rows=527 width=3) - default@bug_201_input_a,fa,Tbl:COMPLETE,Col:NONE,Output:["userid"] + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select t1.userid, diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out index e6038b857d..86c10aa9e4 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out @@ -520,96 +520,10 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [$hdt$_1:b] - Stage: Stage-1 Spark Edges: @@ -630,14 +544,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -653,13 +565,6 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out index ff9a0f4fa4..3db099a135 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -134,19 +134,22 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -163,14 +166,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Position of Big Table: 2 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -225,7 +228,7 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [$hdt$_1:a] - Map 2 + Map 5 Map Operator Tree: TableScan alias: b @@ -239,14 +242,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Position of Big Table: 2 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -301,21 +304,28 @@ STAGE PLANS: name: default.bucket_medium Truncated Path -> Alias: /bucket_medium/ds=2008-04-08 [$hdt$_2:b] - Map 5 + Map 6 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + tag: 2 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -327,23 +337,23 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 3 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium - numFiles 3 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 170 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -351,93 +361,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 3 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_medium - name: default.bucket_medium - Truncated Path -> Alias: - /bucket_medium/ds=2008-04-08 [$hdt$_3:d] - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - input vertices: - 0 Map 1 - 1 Map 2 - Position of Big Table: 2 - Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - input vertices: - 1 Map 5 - Position of Big Table: 0 - Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: bucket_count 4 bucket_field_name key @@ -480,31 +427,51 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [$hdt$_0:c] + /bucket_big/ds=2008-04-09 [$hdt$_0:c] + Map 7 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Select Operator + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 170 Basic stats: PARTIAL Column stats: NONE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: - bucket_count 4 + bucket_count 3 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_medium + numFiles 3 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_medium { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 170 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -512,26 +479,65 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 3 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_medium partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_medium { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_medium + name: default.bucket_medium Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [$hdt$_0:c] - /bucket_big/ds=2008-04-09 [$hdt$_0:c] + /bucket_medium/ds=2008-04-08 [$hdt$_3:d] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 255 Data size: 25572 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 255 Data size: 69177 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -569,7 +575,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Work 'Reducer 3' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out index 8c0d506b26..5c73ddbf1a 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out @@ -41,31 +41,10 @@ POSTHOOK: query: explain select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -81,14 +60,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -99,8 +76,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -150,38 +125,17 @@ POSTHOOK: query: explain select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: b @@ -190,14 +144,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Right Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 0 Map 1 Statistics: Num rows: 207 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -208,9 +160,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out index b005bda331..fbbe01dbd8 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out @@ -41,31 +41,10 @@ POSTHOOK: query: explain select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -81,14 +60,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -99,8 +76,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -129,38 +104,17 @@ POSTHOOK: query: explain select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: b @@ -169,14 +123,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Right Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 0 Map 1 Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -187,9 +139,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out index cb8564fd78..a92cd2d2c8 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out @@ -166,6 +166,98 @@ POSTHOOK: Input: default@stage_bucket_big@file_tag=1 POSTHOOK: Output: default@bucket_big@day=day1/pri=1 POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE [(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, comment:null), ] POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE [(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain select + a.key , + a.value , + b.value , + 'day1' as day, + 1 as pri + from + ( + select + key, + value + from bucket_big where day='day1' + ) a + left outer join + ( + select + key, + value + from bucket_small + where pri between 1 and 2 + ) b + on + (a.key = b.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select + a.key , + a.value , + b.value , + 'day1' as day, + 1 as pri + from + ( + select + key, + value + from bucket_big where day='day1' + ) a + left outer join + ( + select + key, + value + from bucket_small + where pri between 1 and 2 + ) b + on + (a.key = b.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: bucket_big + Statistics: Num rows: 5 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 259 Data size: 2468 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col3 (type: string), 'day1' (type: string), 1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 259 Data size: 2468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 259 Data size: 2468 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key , a.value , diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index 025d0d29c5..6c25da4b8c 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -294,96 +294,10 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [$hdt$_1:b] - Stage: Stage-1 Spark Edges: @@ -404,14 +318,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 59 Data size: 6050 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -427,13 +339,6 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out index 3ad950a107..624a730a9f 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out @@ -402,146 +402,10 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [$hdt$_1:b] - /bucket_small/ds=2008-04-09 [$hdt$_1:b] - Stage: Stage-1 Spark Edges: @@ -562,14 +426,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -585,13 +447,6 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index 60437ec56d..44f792a154 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -418,146 +418,10 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [$hdt$_1:b] - /bucket_small/ds=2008-04-09 [$hdt$_1:b] - Stage: Stage-1 Spark Edges: @@ -578,14 +442,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -601,13 +463,6 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index 16ecabe05d..73525bcba1 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -539,146 +539,10 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [$hdt$_1:b] - /bucket_small/ds=2008-04-09 [$hdt$_1:b] - Stage: Stage-1 Spark Edges: @@ -699,14 +563,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 59 Data size: 6050 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -722,13 +584,6 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out index e180471dcb..abd6e653b4 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out @@ -539,146 +539,10 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [$hdt$_1:b] - /bucket_small/ds=2008-04-09 [$hdt$_1:b] - Stage: Stage-1 Spark Edges: @@ -699,14 +563,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - input vertices: - 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -722,13 +584,6 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index 4d0476f9ee..033ee04e3c 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -387,18 +387,19 @@ join on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -430,9 +431,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 - Local Work: - Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -440,52 +439,34 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 2 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -493,27 +474,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -1018,16 +984,18 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -1039,20 +1007,12 @@ STAGE PLANS: expressions: (key + 1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -1064,28 +1024,31 @@ STAGE PLANS: expressions: (key + 1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1533,34 +1496,10 @@ select count(*) from ( ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -1579,14 +1518,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1597,8 +1534,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -1650,34 +1585,10 @@ select key, count(*) from group by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -1696,15 +1607,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 3 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1718,8 +1627,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -1787,34 +1694,10 @@ select count(*) from ) subq2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -1834,15 +1717,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 4 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) @@ -1854,8 +1735,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -1955,43 +1834,19 @@ join on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -2003,15 +1858,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - input vertices: - 1 Map 6 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2025,11 +1878,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 5 - Local Work: - Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2037,79 +1886,34 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 2 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2117,27 +1921,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2204,34 +1993,10 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -2250,14 +2015,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2268,8 +2031,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -2335,34 +2096,10 @@ select count(*) from on subq2.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -2381,14 +2118,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2399,8 +2134,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -2490,34 +2223,10 @@ select count(*) from on subq2.key = subq4.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -2536,14 +2245,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2554,8 +2261,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -2633,34 +2338,10 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -2679,14 +2360,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2697,8 +2376,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -2750,34 +2427,10 @@ select count(*) from join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -2796,14 +2449,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2814,8 +2465,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -2865,34 +2514,10 @@ select count(*) from tbl1 a on a.key = subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -2911,14 +2536,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -2929,8 +2552,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -2990,54 +2611,10 @@ select count(*) from on (subq1.key = subq3.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -3056,7 +2633,7 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 @@ -3064,9 +2641,6 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - input vertices: - 1 Map 3 - 2 Map 4 Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -3077,8 +2651,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -3152,34 +2724,10 @@ join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: @@ -3198,14 +2746,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - input vertices: - 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -3216,8 +2762,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out index 814553d81a..3e8ea6f113 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out @@ -89,14 +89,15 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -111,40 +112,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) @@ -156,9 +130,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -266,14 +238,15 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds is not null and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -288,40 +261,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) @@ -333,9 +279,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -467,14 +411,15 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds is not null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -489,40 +434,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 168 Data size: 1472 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 168 Data size: 1472 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 168 Data size: 1472 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) @@ -534,9 +452,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -674,14 +590,15 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -696,40 +613,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) @@ -741,9 +631,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -863,14 +751,15 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -885,40 +774,13 @@ STAGE PLANS: expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), concat(value, value) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) @@ -930,9 +792,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -1052,14 +912,15 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1074,40 +935,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) @@ -1119,9 +953,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out b/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out index 7b8777f9d6..8b37d2a03d 100644 --- a/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out +++ b/ql/src/test/results/clientpositive/spark/quotedid_smb.q.out @@ -42,6 +42,57 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@src_b2 POSTHOOK: Lineage: src_b2.!@#$%^&*()_q SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_b2.x+1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` +from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` +where a.`x+1` < '11' +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` +from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` +where a.`x+1` < '11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (!@#$%^&*()_q is not null and (x+1 < '11')) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: x+1 (type: string), !@#$%^&*()_q (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q` from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q` where a.`x+1` < '11'