diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java index 3dbdba6..b2369fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -80,7 +79,7 @@ public void addMergedWork(BaseWork work, BaseWork connectWork) { } } - @Explain(skipHeader=true, displayName = "Join") + @Explain(skipHeader = true, displayName = "Join") public List getBaseWorkList() { return mergeWorkList; } @@ -89,7 +88,7 @@ public String getBigTableAlias() { return ((MapWork) bigTableWork).getAliasToWork().keySet().iterator().next(); } - @Explain(skipHeader=true, displayName = "Main") + @Explain(skipHeader = true, displayName = "Main") public BaseWork getMainWork() { return bigTableWork; } @@ -103,4 +102,9 @@ public void setDummyOps(List dummyOps) { public void addDummyOp(HashTableDummyOperator dummyOp) { getMainWork().addDummyOp(dummyOp); } + + @Override + public List getDummyOps() { + return getMainWork().getDummyOps(); + } } diff --git ql/src/test/queries/clientpositive/tez_join_hash.q ql/src/test/queries/clientpositive/tez_join_hash.q index 8b15268..4390fa4 100644 --- ql/src/test/queries/clientpositive/tez_join_hash.q +++ ql/src/test/queries/clientpositive/tez_join_hash.q @@ -10,3 +10,18 @@ explain SELECT count(*) FROM src, orc_src where src.key=orc_src.key; SELECT count(*) FROM src, orc_src where src.key=orc_src.key; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=3000; + +explain +select count(*) from (select x.key as key, y.value as value from +srcpart x join srcpart y on (x.key = y.key) +union all +select key, value from srcpart z) a join src b on (a.value = b.value) group by a.key, a.value; + +select count(*) from (select x.key as key, y.value as value from +srcpart x join srcpart y on (x.key = y.key) +union all +select key, value from srcpart z) a join src b on (a.value = b.value) group by a.key, a.value; diff --git ql/src/test/results/clientpositive/tez/tez_join_hash.q.out ql/src/test/results/clientpositive/tez/tez_join_hash.q.out index e698d72..a270c81 100644 --- ql/src/test/results/clientpositive/tez/tez_join_hash.q.out +++ ql/src/test/results/clientpositive/tez/tez_join_hash.q.out @@ -122,3 +122,506 @@ POSTHOOK: Input: default@orc_src POSTHOOK: Input: default@src #### A masked pattern was here #### 1028 +PREHOOK: query: explain +select count(*) from (select x.key as key, y.value as value from +srcpart x join srcpart y on (x.key = y.key) +union all +select key, value from srcpart z) a join src b on (a.value = b.value) group by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(*) from (select x.key as key, y.value as value from +srcpart x join srcpart y on (x.key = y.key) +union all +select key, value from srcpart z) a join src b on (a.value = b.value) group by a.key, a.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (BROADCAST_EDGE), Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: z + Filter Operator + predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + value expressions: _col2 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 6 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col8 + Select Operator + expressions: _col0 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + value expressions: _col2 (type: bigint) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from (select x.key as key, y.value as value from +srcpart x join srcpart y on (x.key = y.key) +union all +select key, value from srcpart z) a join src b on (a.value = b.value) group by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from (select x.key as key, y.value as value from +srcpart x join srcpart y on (x.key = y.key) +union all +select key, value from srcpart z) a join src b on (a.value = b.value) group by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +468 +20 +144 +20 +20 +144 +144 +144 +20 +144 +20 +20 +20 +144 +20 +20 +20 +20 +20 +20 +144 +20 +20 +20 +144 +20 +20 +20 +20 +20 +144 +468 +144 +20 +20 +144 +144 +144 +144 +144 +144 +144 +144 +20 +144 +144 +20 +144 +144 +144 +20 +20 +20 +20 +144 +20 +144 +20 +20 +144 +20 +144 +468 +20 +20 +144 +20 +20 +20 +468 +20 +20 +20 +20 +144 +20 +144 +20 +468 +144 +468 +144 +144 +20 +144 +20 +20 +20 +144 +20 +144 +20 +20 +20 +144 +20 +468 +144 +20 +20 +20 +20 +20 +144 +468 +20 +20 +20 +144 +468 +144 +20 +2100 +20 +1088 +20 +20 +144 +144 +144 +468 +468 +20 +20 +20 +20 +20 +20 +144 +144 +468 +20 +1088 +20 +20 +468 +20 +20 +20 +20 +20 +20 +20 +20 +20 +20 +468 +468 +144 +20 +20 +144 +20 +20 +20 +468 +20 +20 +20 +20 +20 +144 +20 +20 +20 +20 +144 +20 +144 +144 +20 +20 +144 +20 +144 +468 +144 +20 +468 +20 +144 +1088 +144 +144 +144 +20 +20 +20 +144 +468 +20 +1088 +144 +144 +144 +20 +20 +144 +144 +20 +20 +468 +20 +20 +144 +468 +20 +144 +20 +20 +144 +468 +20 +20 +144 +144 +20 +20 +2100 +144 +144 +20 +20 +144 +20 +20 +20 +20 +1088 +144 +144 +144 +20 +20 +20 +20 +144 +20 +20 +20 +468 +144 +20 +468 +144 +20 +20 +20 +20 +144 +2100 +468 +20 +20 +20 +20 +20 +20 +20 +144 +20 +468 +144 +468 +20 +468 +20 +20 +20 +144 +20 +144 +20 +20 +20 +20 +468 +144 +20 +20 +20 +468 +20 +20 +144 +20 +144 +2100 +20 +20 +20 +144 +20 +20 +20 +1088 +20 +144 +20 +20 +144 +144 +144 +20 +144 +20 +468 +144 +144