diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index e79d1005ec..cddcf163a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -128,10 +128,10 @@ for (Operator parent : start.getParentOperators()) { if (onlyIncludeIndex >= 0) { if (onlyIncludeIndex == i) { - findOperatorsUpstream(parent, clazz, found); + findOperatorsUpstreamJoinAccounted(parent, clazz, found); } } else { - findOperatorsUpstream(parent, clazz, found); + findOperatorsUpstreamJoinAccounted(parent, clazz, found); } i++; } diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q index aeb244ab8a..c67acd4231 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q @@ -28,86 +28,185 @@ CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY ( insert overwrite table tab partition (ds='2008-04-08') select key,value from srcbucket_mapjoin; +analyze table srcbucket_mapjoin compute statistics for columns; +analyze table srcbucket_mapjoin_part compute statistics for columns; +analyze table tab compute statistics for columns; +analyze table tab_part compute statistics for columns; + +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key; +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key; + set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key; +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key; + +set hive.auto.convert.join.noconditionaltask.size=900; +set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) from -(select distinct key, value from tab_part) a join tab b on a.key = b.key; - +(select distinct key from tab_part) a join tab b on a.key = b.key; select count(*) from -(select distinct key, value from tab_part) a join tab b on a.key = b.key; +(select distinct key from tab_part) a join tab b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select count(*) +from +(select distinct key from tab_part) a join tab b on a.key = b.key; +select count(*) +from +(select distinct key from tab_part) a join tab b on a.key = b.key; + + +set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) from (select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c join tab_part d on c.key = d.key; +select count(*) +from +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +join +tab_part d on c.key = d.key; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select count(*) +from +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +join +tab_part d on c.key = d.key; select count(*) from (select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c join tab_part d on c.key = d.key; +set hive.convert.join.bucket.mapjoin.tez = false; explain select count(*) from tab_part d join (select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key; - select count(*) from tab_part d join (select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key; +select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key; -- one side is really bucketed. srcbucket_mapjoin is not really a bucketed table. -- In this case the sub-query is chosen as the big table. +set hive.convert.join.bucket.mapjoin.tez = false; +set hive.auto.convert.join.noconditionaltask.size=1000; explain select a.k1, a.v1, b.value from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a join tab b on a.k1 = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select a.k1, a.v1, b.value +from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a + join tab b on a.k1 = b.key; +set hive.convert.join.bucket.mapjoin.tez = false; explain select a.k1, a.v1, b.value from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a join tab b on a.k1 = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select a.k1, a.v1, b.value +from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a + join tab b on a.k1 = b.key; +set hive.convert.join.bucket.mapjoin.tez = false; explain select a.k1, a.v1, b.value from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a join tab_part b on a.k1 = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select a.k1, a.v1, b.value +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a + join tab_part b on a.k1 = b.key; -- multi-way join +set hive.convert.join.bucket.mapjoin.tez = false; +set hive.auto.convert.join.noconditionaltask.size=20000; +explain +select a.key, a.value, b.value +from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key; +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select a.key, a.value, c.value +from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, c.value from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key; -- in this case sub-query is the small table +set hive.convert.join.bucket.mapjoin.tez = false; +set hive.auto.convert.join.noconditionaltask.size=900; explain select a.key, a.value, b.value from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a join tab_part b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a + join tab_part b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = false; set hive.map.aggr=false; explain select a.key, a.value, b.value from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a join tab_part b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a + join tab_part b on a.key = b.key; --- join on non-bucketed column results in broadcast join. +-- join on non-bucketed column results in shuffle join. +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select a.key, a.value, b.value +from tab a join tab_part b on a.value = b.value; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value from tab a join tab_part b on a.value = b.value; @@ -116,36 +215,30 @@ CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORE insert overwrite table tab1 select key,value from srcbucket_mapjoin; +set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select a.key, a.value, b.value +from tab1 a join tab_part b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value from tab1 a join tab_part b on a.key = b.key; +-- No map joins should be created. +set hive.convert.join.bucket.mapjoin.tez = false; +set hive.auto.convert.join.noconditionaltask.size=1500; +explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; -set hive.auto.convert.join.noconditionaltask.size=50000; +set hive.convert.join.bucket.mapjoin.tez = false; +-- This wont have any effect as the column ds is partition column which is not bucketed. explain select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key and a.ds = b.ds; - -set hive.auto.convert.join.noconditionaltask.size=10000; -set hive.mapjoin.hybridgrace.hashtable = false; -insert overwrite table tab partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin where key = 411; - -explain -select count(*) -from tab_part a join tab b on a.key = b.key; - -select count(*) -from tab_part a join tab b on a.key = b.key; - -set hive.mapjoin.hybridgrace.hashtable = false; -insert overwrite table tab partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin where key = 411; - +set hive.convert.join.bucket.mapjoin.tez = true; explain -select count(*) -from tab_part a join tab b on a.key = b.key; +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key and a.ds = b.ds; -select count(*) -from tab_part a join tab b on a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q index 37989ecc9d..45feec2b63 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez2.q @@ -28,27 +28,82 @@ CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY ( insert overwrite table tab partition (ds='2008-04-08') select key,value from srcbucket_mapjoin; -set hive.convert.join.bucket.mapjoin.tez = true; +analyze table srcbucket_mapjoin compute statistics for columns; +analyze table srcbucket_mapjoin_part compute statistics for columns; +analyze table tab compute statistics for columns; +analyze table tab_part compute statistics for columns; +set hive.auto.convert.join.noconditionaltask.size=1500; +set hive.convert.join.bucket.mapjoin.tez = false; +explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value; CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; insert overwrite table tab1 select key,value from srcbucket_mapjoin; +analyze table tab1 compute statistics for columns; +-- A negative test as src is not bucketed. +set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select a.key, a.value, b.value +from tab1 a join src b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, a.value, b.value from tab1 a join src b on a.key = b.key; +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key; +set hive.auto.convert.join.noconditionaltask.size=300; +set hive.convert.join.bucket.mapjoin.tez = false; +explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key; +set hive.convert.join.bucket.mapjoin.tez = true; explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key; +set hive.convert.join.bucket.mapjoin.tez = false; explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value; +set hive.convert.join.bucket.mapjoin.tez = true; +explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value; + + + +--multi key +CREATE TABLE tab_part1 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key, value) INTO 4 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab_part1 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; +analyze table tab_part1 compute statistics for columns; + +set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.convert.join.bucket.mapjoin.tez = false; +explain +select count(*) +from +(select distinct key,value from tab_part) a join tab b on a.key = b.key and a.value = b.value; +set hive.convert.join.bucket.mapjoin.tez = true; +explain +select count(*) +from +(select distinct key,value from tab_part) a join tab b on a.key = b.key and a.value = b.value; diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index f76356658d..0fecf1a8c7 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -108,6 +108,62 @@ POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: Output: default@tab@ds=2008-04-08 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: analyze table srcbucket_mapjoin compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@srcbucket_mapjoin +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcbucket_mapjoin compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcbucket_mapjoin_part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcbucket_mapjoin_part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: analyze table tab compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Output: default@tab +PREHOOK: Output: default@tab@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Output: default@tab +POSTHOOK: Output: default@tab@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: analyze table tab_part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part +PREHOOK: Output: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab_part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### PREHOOK: query: explain select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key @@ -132,19 +188,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -152,19 +208,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -178,14 +234,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -197,15 +253,1089 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +2 val_2 val_2 +4 val_4 val_4 +8 val_8 val_8 +11 val_11 val_11 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +17 val_17 val_17 +19 val_19 val_19 +20 val_20 val_20 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +28 val_28 val_28 +33 val_33 val_33 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +44 val_44 val_44 +51 val_51 val_51 +51 val_51 val_51 +51 val_51 val_51 +51 val_51 val_51 +53 val_53 val_53 +57 val_57 val_57 +64 val_64 val_64 +66 val_66 val_66 +77 val_77 val_77 +80 val_80 val_80 +82 val_82 val_82 +84 val_84 val_84 +84 val_84 val_84 +84 val_84 val_84 +84 val_84 val_84 +86 val_86 val_86 +95 val_95 val_95 +95 val_95 val_95 +95 val_95 val_95 +95 val_95 val_95 +97 val_97 val_97 +97 val_97 val_97 +97 val_97 val_97 +97 val_97 val_97 +103 val_103 val_103 +103 val_103 val_103 +103 val_103 val_103 +103 val_103 val_103 +105 val_105 val_105 +114 val_114 val_114 +116 val_116 val_116 +118 val_118 val_118 +118 val_118 val_118 +118 val_118 val_118 +118 val_118 val_118 +125 val_125 val_125 +125 val_125 val_125 +125 val_125 val_125 +125 val_125 val_125 +129 val_129 val_129 +129 val_129 val_129 +129 val_129 val_129 +129 val_129 val_129 +134 val_134 val_134 +134 val_134 val_134 +134 val_134 val_134 +134 val_134 val_134 +136 val_136 val_136 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +143 val_143 val_143 +145 val_145 val_145 +149 val_149 val_149 +149 val_149 val_149 +149 val_149 val_149 +149 val_149 val_149 +150 val_150 val_150 +152 val_152 val_152 +152 val_152 val_152 +152 val_152 val_152 +152 val_152 val_152 +156 val_156 val_156 +158 val_158 val_158 +163 val_163 val_163 +165 val_165 val_165 +165 val_165 val_165 +165 val_165 val_165 +165 val_165 val_165 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +170 val_170 val_170 +172 val_172 val_172 +172 val_172 val_172 +172 val_172 val_172 +172 val_172 val_172 +174 val_174 val_174 +174 val_174 val_174 +174 val_174 val_174 +174 val_174 val_174 +176 val_176 val_176 +176 val_176 val_176 +176 val_176 val_176 +176 val_176 val_176 +178 val_178 val_178 +181 val_181 val_181 +183 val_183 val_183 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +189 val_189 val_189 +190 val_190 val_190 +192 val_192 val_192 +194 val_194 val_194 +196 val_196 val_196 +200 val_200 val_200 +200 val_200 val_200 +200 val_200 val_200 +200 val_200 val_200 +202 val_202 val_202 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +217 val_217 val_217 +217 val_217 val_217 +217 val_217 val_217 +217 val_217 val_217 +219 val_219 val_219 +219 val_219 val_219 +219 val_219 val_219 +219 val_219 val_219 +222 val_222 val_222 +224 val_224 val_224 +224 val_224 val_224 +224 val_224 val_224 +224 val_224 val_224 +226 val_226 val_226 +228 val_228 val_228 +233 val_233 val_233 +233 val_233 val_233 +233 val_233 val_233 +233 val_233 val_233 +235 val_235 val_235 +237 val_237 val_237 +237 val_237 val_237 +237 val_237 val_237 +237 val_237 val_237 +239 val_239 val_239 +239 val_239 val_239 +239 val_239 val_239 +239 val_239 val_239 +242 val_242 val_242 +242 val_242 val_242 +242 val_242 val_242 +242 val_242 val_242 +244 val_244 val_244 +248 val_248 val_248 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +257 val_257 val_257 +260 val_260 val_260 +262 val_262 val_262 +266 val_266 val_266 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +275 val_275 val_275 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +280 val_280 val_280 +280 val_280 val_280 +280 val_280 val_280 +280 val_280 val_280 +282 val_282 val_282 +282 val_282 val_282 +282 val_282 val_282 +282 val_282 val_282 +284 val_284 val_284 +286 val_286 val_286 +288 val_288 val_288 +288 val_288 val_288 +288 val_288 val_288 +288 val_288 val_288 +291 val_291 val_291 +305 val_305 val_305 +307 val_307 val_307 +307 val_307 val_307 +307 val_307 val_307 +307 val_307 val_307 +309 val_309 val_309 +309 val_309 val_309 +309 val_309 val_309 +309 val_309 val_309 +310 val_310 val_310 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +321 val_321 val_321 +321 val_321 val_321 +321 val_321 val_321 +321 val_321 val_321 +323 val_323 val_323 +325 val_325 val_325 +325 val_325 val_325 +325 val_325 val_325 +325 val_325 val_325 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +332 val_332 val_332 +336 val_336 val_336 +338 val_338 val_338 +341 val_341 val_341 +345 val_345 val_345 +356 val_356 val_356 +365 val_365 val_365 +367 val_367 val_367 +367 val_367 val_367 +367 val_367 val_367 +367 val_367 val_367 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +374 val_374 val_374 +378 val_378 val_378 +389 val_389 val_389 +392 val_392 val_392 +394 val_394 val_394 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +400 val_400 val_400 +402 val_402 val_402 +404 val_404 val_404 +404 val_404 val_404 +404 val_404 val_404 +404 val_404 val_404 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +411 val_411 val_411 +413 val_413 val_413 +413 val_413 val_413 +413 val_413 val_413 +413 val_413 val_413 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +419 val_419 val_419 +424 val_424 val_424 +424 val_424 val_424 +424 val_424 val_424 +424 val_424 val_424 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +435 val_435 val_435 +437 val_437 val_437 +439 val_439 val_439 +439 val_439 val_439 +439 val_439 val_439 +439 val_439 val_439 +444 val_444 val_444 +446 val_446 val_446 +448 val_448 val_448 +453 val_453 val_453 +455 val_455 val_455 +457 val_457 val_457 +459 val_459 val_459 +459 val_459 val_459 +459 val_459 val_459 +459 val_459 val_459 +460 val_460 val_460 +462 val_462 val_462 +462 val_462 val_462 +462 val_462 val_462 +462 val_462 val_462 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +475 val_475 val_475 +477 val_477 val_477 +479 val_479 val_479 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +482 val_482 val_482 +484 val_484 val_484 +491 val_491 val_491 +493 val_493 val_493 +495 val_495 val_495 +497 val_497 val_497 +PREHOOK: query: explain +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (CUSTOM_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +400 val_400 val_400 +448 val_448 val_448 +84 val_84 val_84 +84 val_84 val_84 +172 val_172 val_172 +172 val_172 val_172 +136 val_136 val_136 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +444 val_444 val_444 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +44 val_44 val_44 +80 val_80 val_80 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +480 val_480 val_480 +480 val_480 val_480 +480 val_480 val_480 +228 val_228 val_228 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +392 val_392 val_392 +224 val_224 val_224 +224 val_224 val_224 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +404 val_404 val_404 +404 val_404 val_404 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +172 val_172 val_172 +172 val_172 val_172 +8 val_8 val_8 +84 val_84 val_84 +84 val_84 val_84 +404 val_404 val_404 +404 val_404 val_404 +260 val_260 val_260 +176 val_176 val_176 +176 val_176 val_176 +176 val_176 val_176 +176 val_176 val_176 +356 val_356 val_356 +4 val_4 val_4 +316 val_316 val_316 +316 val_316 val_316 +316 val_316 val_316 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +396 val_396 val_396 +396 val_396 val_396 +396 val_396 val_396 +224 val_224 val_224 +224 val_224 val_224 +484 val_484 val_484 +200 val_200 val_200 +200 val_200 val_200 +152 val_152 val_152 +152 val_152 val_152 +28 val_28 val_28 +460 val_460 val_460 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +248 val_248 val_248 +200 val_200 val_200 +200 val_200 val_200 +24 val_24 val_24 +24 val_24 val_24 +424 val_424 val_424 +424 val_424 val_424 +424 val_424 val_424 +424 val_424 val_424 +280 val_280 val_280 +280 val_280 val_280 +244 val_244 val_244 +288 val_288 val_288 +288 val_288 val_288 +116 val_116 val_116 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +64 val_64 val_64 +336 val_336 val_336 +288 val_288 val_288 +288 val_288 val_288 +196 val_196 val_196 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +156 val_156 val_156 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +468 val_468 val_468 +24 val_24 val_24 +24 val_24 val_24 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +284 val_284 val_284 +332 val_332 val_332 +192 val_192 val_192 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +280 val_280 val_280 +280 val_280 val_280 +20 val_20 val_20 +208 val_208 val_208 +208 val_208 val_208 +208 val_208 val_208 +152 val_152 val_152 +152 val_152 val_152 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +477 val_477 val_477 +37 val_37 val_37 +37 val_37 val_37 +341 val_341 val_341 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +305 val_305 val_305 +125 val_125 val_125 +125 val_125 val_125 +217 val_217 val_217 +217 val_217 val_217 +257 val_257 val_257 +33 val_33 val_33 +389 val_389 val_389 +309 val_309 val_309 +309 val_309 val_309 +77 val_77 val_77 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +165 val_165 val_165 +165 val_165 val_165 +129 val_129 val_129 +129 val_129 val_129 +217 val_217 val_217 +217 val_217 val_217 +213 val_213 val_213 +213 val_213 val_213 +437 val_437 val_437 +125 val_125 val_125 +125 val_125 val_125 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +169 val_169 val_169 +129 val_129 val_129 +129 val_129 val_129 +345 val_345 val_345 +309 val_309 val_309 +309 val_309 val_309 +37 val_37 val_37 +37 val_37 val_37 +213 val_213 val_213 +213 val_213 val_213 +165 val_165 val_165 +165 val_165 val_165 +97 val_97 val_97 +97 val_97 val_97 +97 val_97 val_97 +97 val_97 val_97 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +189 val_189 val_189 +233 val_233 val_233 +233 val_233 val_233 +493 val_493 val_493 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +325 val_325 val_325 +325 val_325 val_325 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +237 val_237 val_237 +237 val_237 val_237 +105 val_105 val_105 +53 val_53 val_53 +321 val_321 val_321 +321 val_321 val_321 +453 val_453 val_453 +149 val_149 val_149 +149 val_149 val_149 +321 val_321 val_321 +321 val_321 val_321 +233 val_233 val_233 +233 val_233 val_233 +497 val_497 val_497 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +413 val_413 val_413 +413 val_413 val_413 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +457 val_457 val_457 +181 val_181 val_181 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +149 val_149 val_149 +149 val_149 val_149 +57 val_57 val_57 +17 val_17 val_17 +325 val_325 val_325 +325 val_325 val_325 +365 val_365 val_365 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +413 val_413 val_413 +413 val_413 val_413 +237 val_237 val_237 +237 val_237 val_237 +417 val_417 val_417 +417 val_417 val_417 +417 val_417 val_417 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +277 val_277 val_277 +145 val_145 val_145 +369 val_369 val_369 +369 val_369 val_369 +369 val_369 val_369 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +194 val_194 val_194 +462 val_462 val_462 +462 val_462 val_462 +26 val_26 val_26 +26 val_26 val_26 +462 val_462 val_462 +462 val_462 val_462 +114 val_114 val_114 +190 val_190 val_190 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +202 val_202 val_202 +242 val_242 val_242 +242 val_242 val_242 +242 val_242 val_242 +242 val_242 val_242 +118 val_118 val_118 +118 val_118 val_118 +282 val_282 val_282 +282 val_282 val_282 +118 val_118 val_118 +118 val_118 val_118 +282 val_282 val_282 +282 val_282 val_282 +26 val_26 val_26 +26 val_26 val_26 +158 val_158 val_158 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +286 val_286 val_286 +378 val_378 val_378 +466 val_466 val_466 +466 val_466 val_466 +466 val_466 val_466 +338 val_338 val_338 +374 val_374 val_374 +66 val_66 val_66 +150 val_150 val_150 +222 val_222 val_222 +134 val_134 val_134 +134 val_134 val_134 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +310 val_310 val_310 +178 val_178 val_178 +262 val_262 val_262 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +2 val_2 val_2 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +42 val_42 val_42 +42 val_42 val_42 +402 val_402 val_402 +226 val_226 val_226 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +134 val_134 val_134 +134 val_134 val_134 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +42 val_42 val_42 +42 val_42 val_42 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +318 val_318 val_318 +318 val_318 val_318 +318 val_318 val_318 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +138 val_138 val_138 +170 val_170 val_170 +266 val_266 val_266 +174 val_174 val_174 +174 val_174 val_174 +174 val_174 val_174 +174 val_174 val_174 +482 val_482 val_482 +394 val_394 val_394 +446 val_446 val_446 +82 val_82 val_82 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +86 val_86 val_86 +183 val_183 val_183 +323 val_323 val_323 +491 val_491 val_491 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +235 val_235 val_235 +239 val_239 val_239 +239 val_239 val_239 +367 val_367 val_367 +367 val_367 val_367 +103 val_103 val_103 +103 val_103 val_103 +143 val_143 val_143 +11 val_11 val_11 +95 val_95 val_95 +95 val_95 val_95 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +19 val_19 val_19 +15 val_15 val_15 +15 val_15 val_15 +419 val_419 val_419 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +95 val_95 val_95 +95 val_95 val_95 +51 val_51 val_51 +51 val_51 val_51 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +411 val_411 val_411 +275 val_275 val_275 +239 val_239 val_239 +239 val_239 val_239 +103 val_103 val_103 +103 val_103 val_103 +51 val_51 val_51 +51 val_51 val_51 +459 val_459 val_459 +459 val_459 val_459 +187 val_187 val_187 +187 val_187 val_187 +187 val_187 val_187 +455 val_455 val_455 +367 val_367 val_367 +367 val_367 val_367 +459 val_459 val_459 +459 val_459 val_459 +15 val_15 val_15 +15 val_15 val_15 +327 val_327 val_327 +327 val_327 val_327 +327 val_327 val_327 +495 val_495 val_495 +307 val_307 val_307 +307 val_307 val_307 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +479 val_479 val_479 +439 val_439 val_439 +439 val_439 val_439 +163 val_163 val_163 +255 val_255 val_255 +255 val_255 val_255 +291 val_291 val_291 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +219 val_219 val_219 +219 val_219 val_219 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +435 val_435 val_435 +307 val_307 val_307 +307 val_307 val_307 +431 val_431 val_431 +431 val_431 val_431 +431 val_431 val_431 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +475 val_475 val_475 +167 val_167 val_167 +167 val_167 val_167 +167 val_167 val_167 +439 val_439 val_439 +439 val_439 val_439 +219 val_219 val_219 +219 val_219 val_219 +255 val_255 val_255 +255 val_255 val_255 PREHOOK: query: explain select count(*) from -(select distinct key, value from tab_part) a join tab b on a.key = b.key +(select distinct key from tab_part) a join tab b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) from -(select distinct key, value from tab_part) a join tab b on a.key = b.key +(select distinct key from tab_part) a join tab b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -216,90 +1346,93 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: key (type: int), value (type: string) + keys: key (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string) + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 237 Data size: 44935 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 237 Data size: 44935 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 260 Data size: 49428 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -313,7 +1446,7 @@ STAGE PLANS: PREHOOK: query: select count(*) from -(select distinct key, value from tab_part) a join tab b on a.key = b.key +(select distinct key from tab_part) a join tab b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@tab PREHOOK: Input: default@tab@ds=2008-04-08 @@ -322,7 +1455,7 @@ PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: select count(*) from -(select distinct key, value from tab_part) a join tab b on a.key = b.key +(select distinct key from tab_part) a join tab b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tab POSTHOOK: Input: default@tab@ds=2008-04-08 @@ -333,16 +1466,12 @@ POSTHOOK: Input: default@tab_part@ds=2008-04-08 PREHOOK: query: explain select count(*) from -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c -join -tab_part d on c.key = d.key +(select distinct key from tab_part) a join tab b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) from -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c -join -tab_part d on c.key = d.key +(select distinct key from tab_part) a join tab b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -353,105 +1482,86 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: int) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,9 +1575,7 @@ STAGE PLANS: PREHOOK: query: select count(*) from -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c -join -tab_part d on c.key = d.key +(select distinct key from tab_part) a join tab b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@tab PREHOOK: Input: default@tab@ds=2008-04-08 @@ -476,29 +1584,27 @@ PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: select count(*) from -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c -join -tab_part d on c.key = d.key +(select distinct key from tab_part) a join tab b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tab POSTHOOK: Input: default@tab@ds=2008-04-08 POSTHOOK: Input: default@tab_part POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### -1166 +242 PREHOOK: query: explain select count(*) from -tab_part d +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c join -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +tab_part d on c.key = d.key PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) from -tab_part d +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c join -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +tab_part d on c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -509,22 +1615,196 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: d + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 632 Data size: 5056 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) +from +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +join +tab_part d on c.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +join +tab_part d on c.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +1166 +PREHOOK: query: explain +select count(*) +from +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +join +tab_part d on c.key = d.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) +from +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c +join +tab_part d on c.key = d.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -533,12 +1813,12 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 3 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + 1 Map 4 + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -546,68 +1826,49 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) input vertices: - 1 Map 4 - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE + 0 Map 1 + Statistics: Num rows: 632 Data size: 5056 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -621,9 +1882,9 @@ STAGE PLANS: PREHOOK: query: select count(*) from -tab_part d +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c join -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +tab_part d on c.key = d.key PREHOOK: type: QUERY PREHOOK: Input: default@tab PREHOOK: Input: default@tab@ds=2008-04-08 @@ -632,9 +1893,9 @@ PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: select count(*) from -tab_part d +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c join -(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +tab_part d on c.key = d.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tab POSTHOOK: Input: default@tab@ds=2008-04-08 @@ -643,14 +1904,1243 @@ POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### 1166 PREHOOK: query: explain -select a.k1, a.v1, b.value -from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab b on a.k1 = b.key +select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 632 Data size: 5056 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +1166 +PREHOOK: query: explain +select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 4 + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 632 Data size: 5056 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from +tab_part d +join +(select a.key as key, a.value as value from tab a join tab_part b on a.key = b.key) c on c.key = d.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +1166 +PREHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 191 Data size: 19673 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 191 Data size: 19673 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 191 Data size: 19673 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a + join tab b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a + join tab b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 191 Data size: 19673 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 191 Data size: 19673 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 191 Data size: 19673 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a +join tab b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a +join tab b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), substr(_col2, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a + join tab b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a + join tab b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 4 (CUSTOM_EDGE) + Map 3 <- Map 2 (CUSTOM_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: tab + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + input vertices: + 0 Map 2 + Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), substr(_col2, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a +join tab_part b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a +join tab_part b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 3 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 382 Data size: 36290 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), substr(_col1, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 382 Data size: 36290 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a + join tab_part b on a.k1 = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.k1, a.v1, b.value +from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a + join tab_part b on a.k1 = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 3 (CUSTOM_EDGE) + Map 2 <- Map 4 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 4 + Statistics: Num rows: 382 Data size: 36290 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), substr(_col1, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 382 Data size: 36290 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, a.value, b.value +from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, a.value, b.value +from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key PREHOOK: type: QUERY POSTHOOK: query: explain -select a.k1, a.v1, b.value -from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab b on a.k1 = b.key +select a.key, a.value, b.value +from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -661,90 +3151,86 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: srcbucket_mapjoin - Statistics: Num rows: 27 Data size: 7884 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), substr(value, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 input vertices: - 0 Reducer 2 - Statistics: Num rows: 253 Data size: 48084 Basic stats: COMPLETE Column stats: NONE + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 253 Data size: 48084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 253 Data size: 48084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs - Reducer 2 + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs Stage: Stage-0 Fetch Operator @@ -753,14 +3239,12 @@ STAGE PLANS: ListSink PREHOOK: query: explain -select a.k1, a.v1, b.value -from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a -join tab b on a.k1 = b.key +select a.key, a.value, c.value +from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key PREHOOK: type: QUERY POSTHOOK: query: explain -select a.k1, a.v1, b.value -from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on tab_part.key = tab.key GROUP BY tab.key) a -join tab b on a.k1 = b.key +select a.key, a.value, c.value +from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -771,124 +3255,101 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 4 (CUSTOM_EDGE) - Map 3 <- Map 2 (CUSTOM_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2 + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 4 - Statistics: Num rows: 287 Data size: 1093 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287 Data size: 1093 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 287 Data size: 1093 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 3 + Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan - alias: tab_part - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan - alias: tab - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 0 Map 2 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), substr(_col2, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 261 Data size: 994 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 261 Data size: 994 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 261 Data size: 994 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -897,14 +3358,12 @@ STAGE PLANS: ListSink PREHOOK: query: explain -select a.k1, a.v1, b.value -from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a -join tab_part b on a.k1 = b.key +select a.key, a.value, c.value +from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key PREHOOK: type: QUERY POSTHOOK: query: explain -select a.k1, a.v1, b.value -from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on x.key = y.key GROUP BY x.key) a -join tab_part b on a.k1 = b.key +select a.key, a.value, c.value +from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -915,124 +3374,93 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 3 (CUSTOM_EDGE) - Map 2 <- Map 4 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Reducer 3 - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan alias: x - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 253 Data size: 45401 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), substr(_col1, 5) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 253 Data size: 45401 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 253 Data size: 45401 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 253 Data size: 45401 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 3 + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 126 Data size: 22610 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 126 Data size: 22610 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 126 Data size: 22610 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + LLAP IO: no inputs Stage: Stage-0 Fetch Operator @@ -1042,11 +3470,13 @@ STAGE PLANS: PREHOOK: query: explain select a.key, a.value, b.value -from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab_part b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, a.value, b.value -from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1057,88 +3487,89 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + alias: srcbucket_mapjoin + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), substr(value, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 1045 Data size: 187028 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1045 Data size: 187028 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1045 Data size: 187028 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1151,12 +3582,14 @@ STAGE PLANS: ListSink PREHOOK: query: explain -select a.key, a.value, c.value -from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a + join tab_part b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain -select a.key, a.value, c.value -from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a + join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1167,93 +3600,193 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + alias: srcbucket_mapjoin + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 574 Data size: 102865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 574 Data size: 102865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 574 Data size: 102865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 Reducer 2 + Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs - Map 2 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1452 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab_part b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a +join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + alias: srcbucket_mapjoin + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap - LLAP IO: no inputs + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1264,12 +3797,12 @@ STAGE PLANS: PREHOOK: query: explain select a.key, a.value, b.value from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab_part b on a.key = b.key + join tab_part b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, a.value, b.value from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab_part b on a.key = b.key + join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1288,40 +3821,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_mapjoin - Statistics: Num rows: 27 Data size: 7884 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), substr(value, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1331,14 +3858,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1351,14 +3878,14 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: int) - mode: mergepartial + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 153 Data size: 1836 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Stage: Stage-0 @@ -1369,13 +3896,11 @@ STAGE PLANS: PREHOOK: query: explain select a.key, a.value, b.value -from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab_part b on a.key = b.key +from tab a join tab_part b on a.value = b.value PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, a.value, b.value -from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket_mapjoin GROUP BY srcbucket_mapjoin.key) a -join tab_part b on a.key = b.key +from tab a join tab_part b on a.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1386,80 +3911,66 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: srcbucket_mapjoin - Statistics: Num rows: 27 Data size: 7884 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), substr(value, 5) (type: string) + expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 7592 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 3796 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1491,19 +4002,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1511,19 +4022,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 475 Data size: 88251 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 88251 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 475 Data size: 88251 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1536,10 +4047,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 522 Data size: 97076 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 522 Data size: 97076 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1617,19 +4128,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1643,14 +4154,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1662,6 +4173,88 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +select a.key, a.value, b.value +from tab1 a join tab_part b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.value, b.value +from tab1 a join tab_part b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (CUSTOM_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value @@ -1675,94 +4268,216 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Map 3 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan alias: c - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 2 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col1 input vertices: - 0 Map 1 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col3 - input vertices: - 0 Map 2 - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1794,19 +4509,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 90522 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 83680 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 230 Data size: 83680 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 230 Data size: 83680 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1814,34 +4529,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 186800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 172376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 475 Data size: 172376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 475 Data size: 172376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 172376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 237 Data size: 86096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator Target column: ds (string) Target Input: a Partition key expr: ds - Statistics: Num rows: 237 Data size: 86096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Target Vertex: Map 1 Execution mode: llap LLAP IO: no inputs @@ -1855,14 +4570,14 @@ STAGE PLANS: 0 _col0 (type: int), _col2 (type: string) 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 1150 Data size: 421688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1150 Data size: 421688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1150 Data size: 421688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1874,27 +4589,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin where key = 411 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket_mapjoin -PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 -PREHOOK: Output: default@tab@ds=2008-04-08 -POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin where key = 411 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket_mapjoin -POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 -POSTHOOK: Output: default@tab@ds=2008-04-08 -POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [] -POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain -select count(*) -from tab_part a join tab b on a.key = b.key +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key and a.ds = b.ds PREHOOK: type: QUERY POSTHOOK: query: explain -select count(*) -from tab_part a join tab b on a.key = b.key +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key and a.ds = b.ds POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1905,189 +4606,86 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 6855 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 6855 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 522 Data size: 7540 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 522 Data size: 7540 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count() - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) -from tab_part a join tab b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tab -PREHOOK: Input: default@tab@ds=2008-04-08 -PREHOOK: Input: default@tab_part -PREHOOK: Input: default@tab_part@ds=2008-04-08 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) -from tab_part a join tab b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tab -POSTHOOK: Input: default@tab@ds=2008-04-08 -POSTHOOK: Input: default@tab_part -POSTHOOK: Input: default@tab_part@ds=2008-04-08 -#### A masked pattern was here #### -1 -PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin where key = 411 -PREHOOK: type: QUERY -PREHOOK: Input: default@srcbucket_mapjoin -PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 -PREHOOK: Output: default@tab@ds=2008-04-08 -POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') -select key,value from srcbucket_mapjoin where key = 411 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcbucket_mapjoin -POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 -POSTHOOK: Output: default@tab@ds=2008-04-08 -POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [] -POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: explain -select count(*) -from tab_part a join tab b on a.key = b.key -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(*) -from tab_part a join tab b on a.key = b.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 6855 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 6855 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 522 Data size: 7540 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 522 Data size: 7540 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: a + Partition key expr: ds + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: count() - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: string) + 1 _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2095,20 +4693,3 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) -from tab_part a join tab b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tab -PREHOOK: Input: default@tab@ds=2008-04-08 -PREHOOK: Input: default@tab_part -PREHOOK: Input: default@tab_part@ds=2008-04-08 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) -from tab_part a join tab b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tab -POSTHOOK: Input: default@tab@ds=2008-04-08 -POSTHOOK: Input: default@tab_part -POSTHOOK: Input: default@tab_part@ds=2008-04-08 -#### A masked pattern was here #### -1 diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index 57c792f654..e2cee7fd0c 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -108,6 +108,62 @@ POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: Output: default@tab@ds=2008-04-08 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: analyze table srcbucket_mapjoin compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +PREHOOK: Output: default@srcbucket_mapjoin +PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcbucket_mapjoin compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 +POSTHOOK: Output: default@srcbucket_mapjoin +POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcbucket_mapjoin_part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcbucket_mapjoin_part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@srcbucket_mapjoin_part +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: analyze table tab compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Output: default@tab +PREHOOK: Output: default@tab@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Output: default@tab +POSTHOOK: Output: default@tab@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: analyze table tab_part compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@tab_part +PREHOOK: Input: default@tab_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part +PREHOOK: Output: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab_part compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab_part +POSTHOOK: Input: default@tab_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part +POSTHOOK: Output: default@tab_part@ds=2008-04-08 +#### A masked pattern was here #### PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value @@ -121,94 +177,216 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Map 3 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan alias: c - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 2 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col1 input vertices: - 0 Map 1 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col3 - input vertices: - 0 Map 2 - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -238,6 +416,16 @@ POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: Output: default@tab1 POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: analyze table tab1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@tab1 +PREHOOK: Output: default@tab1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab1 +POSTHOOK: Output: default@tab1 +#### A masked pattern was here #### PREHOOK: query: explain select a.key, a.value, b.value from tab1 a join src b on a.key = b.key @@ -262,19 +450,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -308,14 +496,14 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -328,10 +516,12 @@ STAGE PLANS: ListSink PREHOOK: query: explain -select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key +select a.key, a.value, b.value +from tab1 a join src b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain -select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key +select a.key, a.value, b.value +from tab1 a join src b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -342,59 +532,71 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: tab_part - Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan - alias: tab_part - Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -403,10 +605,10 @@ STAGE PLANS: ListSink PREHOOK: query: explain -select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key +select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain -select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key +select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -417,59 +619,536 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 1) (type: boolean) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: no inputs + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (CUSTOM_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs Map 2 Map Operator Tree: TableScan - alias: tab_part - Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 1) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (CUSTOM_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 1) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (CUSTOM_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -477,11 +1156,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain -select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key +PREHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key PREHOOK: type: QUERY -POSTHOOK: query: explain -select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key +POSTHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -492,59 +1169,74 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: tab_part - Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE + alias: tab + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: int) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan - alias: tab_part - Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Right Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + 0 Reducer 2 + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -552,9 +1244,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key +PREHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value PREHOOK: type: QUERY -POSTHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key +POSTHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -565,69 +1257,82 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: key (type: int) + keys: value (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 11011 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 121 Data size: 11011 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 115 Data size: 1658 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 121 Data size: 11011 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 121 Data size: 11011 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 253 Data size: 3647 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 253 Data size: 3647 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -660,39 +1365,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab - Statistics: Num rows: 242 Data size: 45070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 230 Data size: 42835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 42835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 11011 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 230 Data size: 42835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 11011 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -703,12 +1408,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 115 Data size: 21417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 11011 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 115 Data size: 21417 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 11011 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 3 Execution mode: llap @@ -720,14 +1425,14 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col2 - Statistics: Num rows: 253 Data size: 48084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 253 Data size: 48084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 253 Data size: 48084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -739,3 +1444,266 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: CREATE TABLE tab_part1 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key, value) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab_part1 +POSTHOOK: query: CREATE TABLE tab_part1 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key, value) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab_part1 +PREHOOK: query: insert overwrite table tab_part1 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part1@ds=2008-04-08 +POSTHOOK: query: insert overwrite table tab_part1 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part1@ds=2008-04-08 +POSTHOOK: Lineage: tab_part1 PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tab_part1 PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: analyze table tab_part1 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@tab_part1 +PREHOOK: Input: default@tab_part1@ds=2008-04-08 +PREHOOK: Output: default@tab_part1 +PREHOOK: Output: default@tab_part1@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table tab_part1 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab_part1 +POSTHOOK: Input: default@tab_part1@ds=2008-04-08 +POSTHOOK: Output: default@tab_part1 +POSTHOOK: Output: default@tab_part1@ds=2008-04-08 +#### A masked pattern was here #### +PREHOOK: query: explain +select count(*) +from +(select distinct key,value from tab_part) a join tab b on a.key = b.key and a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) +from +(select distinct key,value from tab_part) a join tab b on a.key = b.key and a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: int), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select count(*) +from +(select distinct key,value from tab_part) a join tab b on a.key = b.key and a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) +from +(select distinct key,value from tab_part) a join tab b on a.key = b.key and a.value = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tab_part + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: int), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +