Index: conf/hive-default.xml.template
===================================================================
--- conf/hive-default.xml.template (revision 1438313)
+++ conf/hive-default.xml.template (working copy)
@@ -814,8 +814,25 @@
Whether Hive enable the optimization about converting common join into mapjoin based on the input file size
+
+ hive.auto.convert.join.aggressivemapjoin
+ false
+ Whether Hive enable the optimization about converting common join into mapjoin based on the input file
+ size. If this paramater is on, and the size for n-1 of the tables/partitions for a n-way join is smaller than the
+ specified size, the join is directly converted to a mapjoin (there is no conditional task).
+
+
+ hive.auto.convert.join.aggressivemapjoin.size
+ 10000
+ If hive.auto.convert.join.aggressivemapjoin is off, this parameter does not take affect. However, if it
+ is on, and the size for n-1 of the tables/partitions for a n-way join is smaller than this size, the join is directly
+ converted to a mapjoin(there is no conditional task).
+
+
+
+
hive.script.auto.progress
false
Whether Hive Tranform/Map/Reduce Clause should automatically send progress information to TaskTracker to avoid the task getting killed because of inactivity. Hive sends progress information when the script is outputting to stderr. This option removes the need of periodically producing stderr messages, but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker.
Index: build.properties
===================================================================
--- build.properties (revision 1438313)
+++ build.properties (working copy)
@@ -79,7 +79,7 @@
# (measured in milliseconds). Ignored if fork is disabled. When running
# multiple tests inside the same Java VM (see forkMode), timeout
# applies to the time that all tests use together, not to an individual test.
-test.junit.timeout=43200000
+test.junit.timeout=432000000
# Use this property to selectively disable tests from the command line:
# ant test -Dtest.junit.exclude="**/TestCliDriver.class"
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1438313)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -37,8 +37,6 @@
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.common.LogUtils;
-import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.JobConf;
@@ -471,6 +469,8 @@
HIVESKEWJOIN("hive.optimize.skewjoin", false),
HIVECONVERTJOIN("hive.auto.convert.join", false),
+ HIVECONVERTJOINAGGMAPJOIN("hive.auto.convert.join.aggressivemapjoin", false),
+ HIVECONVERTJOINAGGMAPJOINSIZE("hive.auto.convert.join.aggressivemapjoin.size", 10000L),
HIVESKEWJOINKEY("hive.skewjoin.key", 100000),
HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000),
HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L), //32M
Index: ql/src/test/results/clientpositive/join29.q.out
===================================================================
--- ql/src/test/results/clientpositive/join29.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join29.q.out (working copy)
@@ -3,33 +3,36 @@
POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@dest_j1
-PREHOOK: query: EXPLAIN
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt
+SELECT subq1.key, subq1.cnt, subq2.cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt
+SELECT subq1.key, subq1.cnt, subq2.cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) cnt)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) cnt)))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-11 depends on stages: Stage-1, Stage-9
- Stage-2 depends on stages: Stage-11
- Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2
+ Stage-8 has a backup stage: Stage-2
+ Stage-5 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2, Stage-5, Stage-6
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
- Stage-9 is a root stage
+ Stage-9 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-9
+ Stage-2
+ Stage-4 is a root stage
STAGE PLANS:
Stage: Stage-1
@@ -88,14 +91,17 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-11
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
-#### A masked pattern was here ####
+ $INTNAME
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
-#### A masked pattern was here ####
+ $INTNAME
HashTable Sink Operator
condition expressions:
0 {_col0} {_col1}
@@ -104,12 +110,12 @@
keys:
0 [Column[_col0]]
1 [Column[_col0]]
- Position of Big Table: 1
+ Position of Big Table: 0
- Stage: Stage-2
+ Stage: Stage-5
Map Reduce
Alias -> Map Operator Tree:
-#### A masked pattern was here ####
+ $INTNAME1
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -121,7 +127,7 @@
0 [Column[_col0]]
1 [Column[_col0]]
outputColumnNames: _col0, _col1, _col3
- Position of Big Table: 1
+ Position of Big Table: 0
Select Operator
expressions:
expr: _col0
@@ -130,45 +136,27 @@
type: bigint
expr: _col3
type: bigint
- outputColumnNames: _col0, _col1, _col3
+ outputColumnNames: _col0, _col1, _col2
Select Operator
expressions:
expr: _col0
type: string
- expr: _col1
- type: bigint
- expr: _col3
- type: bigint
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: UDFToInteger(_col2)
+ type: int
outputColumnNames: _col0, _col1, _col2
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: UDFToInteger(_col1)
- type: int
- expr: UDFToInteger(_col2)
- type: int
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- GlobalTableId: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
Local Work:
Map Reduce Local Work
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-0
Move Operator
tables:
@@ -182,39 +170,137 @@
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-4
+ Stage: Stage-9
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
+ $INTNAME
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1}
+ 1 {_col1}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0, _col1, _col3
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: UDFToInteger(_col2)
+ type: int
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ Local Work:
+ Map Reduce Local Work
- Stage: Stage-6
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 1
+ value expressions:
+ expr: _col1
+ type: bigint
+ $INTNAME1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: UDFToInteger(_col2)
+ type: int
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-9
+ Stage: Stage-4
Map Reduce
Alias -> Map Operator Tree:
subq1:x
@@ -272,7 +358,7 @@
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt
+SELECT subq1.key, subq1.cnt, subq2.cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
PREHOOK: type: QUERY
@@ -280,7 +366,7 @@
PREHOOK: Input: default@src1
PREHOOK: Output: default@dest_j1
POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt
+SELECT subq1.key, subq1.cnt, subq2.cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
POSTHOOK: type: QUERY
Index: ql/src/test/results/clientpositive/smb_mapjoin_14.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_14.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/smb_mapjoin_14.q.out (working copy)
@@ -55,7 +55,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -76,32 +75,21 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -175,7 +163,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
- Stage-3 depends on stages: Stage-2
Stage-0 is a root stage
STAGE PLANS:
@@ -197,53 +184,42 @@
1 [Column[key]]
outputColumnNames: _col0
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
Select Operator
expressions:
expr: _col0
type: int
outputColumnNames: _col0
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- keys:
+ Select Operator
+ expressions:
expr: _col0
type: int
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
expr: _col0
type: int
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: int
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -268,7 +244,7 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-3
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
@@ -359,7 +335,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
- Stage-3 depends on stages: Stage-2
Stage-0 is a root stage
STAGE PLANS:
@@ -381,53 +356,42 @@
1 [Column[key]]
outputColumnNames: _col0
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
Select Operator
expressions:
expr: _col0
type: int
outputColumnNames: _col0
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- keys:
+ Select Operator
+ expressions:
expr: _col0
type: int
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
expr: _col0
type: int
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: int
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -453,7 +417,7 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-3
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
@@ -516,382 +480,6 @@
POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
6
-PREHOOK: query: -- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them.
--- Each sub-query should be converted to a sort-merge join.
-explain
-select src1.key, src1.cnt1, src2.cnt1 from
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq1 group by key
-) src1
-join
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq2 group by key
-) src2
-on src1.key = src2.key
-order by src1.key, src1.cnt1, src2.cnt1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them.
--- Each sub-query should be converted to a sort-merge join.
-explain
-select src1.key, src1.cnt1, src2.cnt1 from
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq1 group by key
-) src1
-join
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq2 group by key
-) src2
-on src1.key = src2.key
-order by src1.key, src1.cnt1, src2.cnt1
-POSTHOOK: type: QUERY
-POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) val1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) val2)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count) cnt1)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) src2) (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src2) cnt1))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) cnt1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) cnt1)))))
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-3 depends on stages: Stage-2, Stage-7
- Stage-4 depends on stages: Stage-3
- Stage-6 is a root stage
- Stage-7 depends on stages: Stage-6
- Stage-0 is a root stage
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
- src1:subq1:b
- TableScan
- alias: b
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key}
- 1
- handleSkewJoin: false
- keys:
- 0 [Column[key]]
- 1 [Column[key]]
- outputColumnNames: _col0
- Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- keys:
- expr: _col0
- type: int
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: int
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
- Reduce Operator Tree:
- Group By Operator
- aggregations:
- expr: count(VALUE._col0)
- bucketGroup: false
- keys:
- expr: KEY._col0
- type: int
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: bigint
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-3
- Map Reduce
- Alias -> Map Operator Tree:
- $INTNAME
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: int
- tag: 0
- value expressions:
- expr: _col0
- type: int
- expr: _col1
- type: bigint
- $INTNAME1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: int
- tag: 1
- value expressions:
- expr: _col1
- type: bigint
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {VALUE._col0} {VALUE._col1}
- 1 {VALUE._col1}
- handleSkewJoin: false
- outputColumnNames: _col0, _col1, _col3
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: bigint
- expr: _col3
- type: bigint
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-4
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- expr: _col1
- type: bigint
- expr: _col2
- type: bigint
- sort order: +++
- tag: -1
- value expressions:
- expr: _col0
- type: int
- expr: _col1
- type: bigint
- expr: _col2
- type: bigint
- Reduce Operator Tree:
- Extract
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-
- Stage: Stage-6
- Map Reduce
- Alias -> Map Operator Tree:
- src2:subq2:b
- TableScan
- alias: b
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key}
- 1
- handleSkewJoin: false
- keys:
- 0 [Column[key]]
- 1 [Column[key]]
- outputColumnNames: _col0
- Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-7
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- keys:
- expr: _col0
- type: int
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: int
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
- Reduce Operator Tree:
- Group By Operator
- aggregations:
- expr: count(VALUE._col0)
- bucketGroup: false
- keys:
- expr: KEY._col0
- type: int
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: bigint
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
-
-
-PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq1 group by key
-) src1
-join
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq2 group by key
-) src2
-on src1.key = src2.key
-order by src1.key, src1.cnt1, src2.cnt1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@tbl1
-PREHOOK: Input: default@tbl2
-#### A masked pattern was here ####
-POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq1 group by key
-) src1
-join
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq2 group by key
-) src2
-on src1.key = src2.key
-order by src1.key, src1.cnt1, src2.cnt1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@tbl1
-POSTHOOK: Input: default@tbl2
-#### A masked pattern was here ####
-POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-0 9 9
-2 1 1
-4 1 1
-5 9 9
-8 1 1
-9 1 1
PREHOOK: query: -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should
-- be converted to a sort-merge join.
explain
@@ -919,7 +507,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -949,31 +536,20 @@
0 [Column[_col0]]
1 [Column[_col0]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1058,7 +634,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -1079,31 +654,20 @@
0 [Column[_col0]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1212,7 +776,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -1247,31 +810,20 @@
0 [Column[_col0]]
1 [Column[_col0]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1370,7 +922,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -1400,31 +951,20 @@
0 [Column[_col0]]
1 [Column[_col0]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1498,13 +1038,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL a) key) 1) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
subq1:a
@@ -1551,33 +1090,22 @@
0 [Column[_col0]]
1 [Column[_col0]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1648,7 +1176,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -1669,31 +1196,20 @@
0 [Column[_col0]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1760,7 +1276,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -1790,31 +1305,20 @@
0 [Column[_col0]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1891,7 +1395,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -1924,31 +1427,20 @@
1 [Column[_col0]]
2 [Column[_col0]]
Position of Big Table: 2
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -2041,7 +1533,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -2062,32 +1553,21 @@
0 [Column[_col0]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
Index: ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out
===================================================================
--- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out (working copy)
@@ -71,7 +71,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -93,21 +92,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -160,47 +158,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -231,7 +188,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/join35.q.out
===================================================================
--- ql/src/test/results/clientpositive/join35.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join35.q.out (working copy)
@@ -3,9 +3,11 @@
POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@dest_j1
-PREHOOK: query: EXPLAIN EXTENDED
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt
+SELECT x.key, x.value, subq1.cnt
FROM
( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key
UNION ALL
@@ -13,9 +15,11 @@
) subq1
JOIN src1 x ON (x.key = subq1.key)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN EXTENDED
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt
+SELECT x.key, x.value, subq1.cnt
FROM
( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key
UNION ALL
@@ -24,20 +28,19 @@
JOIN src1 x ON (x.key = subq1.key)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) key))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) cnt)))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-11 depends on stages: Stage-1, Stage-9
- Stage-2 depends on stages: Stage-11
- Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
- Stage-5
- Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10, Stage-2
+ Stage-9 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-9
+ Stage-0 depends on stages: Stage-2, Stage-6, Stage-7
Stage-3 depends on stages: Stage-0
- Stage-4
- Stage-6
- Stage-7 depends on stages: Stage-6
- Stage-9 is a root stage
+ Stage-10 has a backup stage: Stage-2
+ Stage-7 depends on stages: Stage-10
+ Stage-2
+ Stage-4 is a root stage
STAGE PLANS:
Stage: Stage-1
@@ -159,7 +162,10 @@
Truncated Path -> Alias:
/src [null-subquery1:subq1-subquery1:x]
- Stage: Stage-11
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-9
Map Reduce Local Work
Alias -> Map Local Tables:
x
@@ -180,7 +186,7 @@
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-2
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
@@ -201,55 +207,46 @@
Position of Big Table: 0
Select Operator
expressions:
- expr: _col1
- type: bigint
expr: _col2
type: string
expr: _col3
type: string
- outputColumnNames: _col1, _col2, _col3
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
Select Operator
expressions:
- expr: _col2
+ expr: _col0
type: string
- expr: _col3
+ expr: _col1
type: string
- expr: _col1
- type: bigint
+ expr: UDFToInteger(_col2)
+ type: int
outputColumnNames: _col0, _col1, _col2
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: UDFToInteger(_col2)
- type: int
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
+ NumFilesPerFileSink: 1
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:int
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value,val2
+ columns.types string:string:int
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, i32 val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.dest_j1
+ serialization.ddl struct dest_j1 { string key, string value, i32 val2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
#### A masked pattern was here ####
TableScan
GatherStats: false
@@ -268,58 +265,49 @@
Position of Big Table: 0
Select Operator
expressions:
- expr: _col1
- type: bigint
expr: _col2
type: string
expr: _col3
type: string
- outputColumnNames: _col1, _col2, _col3
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
Select Operator
expressions:
- expr: _col2
+ expr: _col0
type: string
- expr: _col3
+ expr: _col1
type: string
- expr: _col1
- type: bigint
+ expr: UDFToInteger(_col2)
+ type: int
outputColumnNames: _col0, _col1, _col2
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: UDFToInteger(_col2)
- type: int
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
+ NumFilesPerFileSink: 1
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:int
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value,val2
+ columns.types string:string:int
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, i32 val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.dest_j1
+ serialization.ddl struct dest_j1 { string key, string value, i32 val2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
- Needs Tagging: false
+ Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -341,7 +329,7 @@
escape.delim \
#### A masked pattern was here ####
Partition
- base file name: -mr-10004
+ base file name: -mr-10003
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
@@ -355,18 +343,51 @@
columns _col0,_col1
columns.types string,bigint
escape.delim \
+#### A masked pattern was here ####
+ Partition
+ base file name: src1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src1
+ name: default.src1
Truncated Path -> Alias:
#### A masked pattern was here ####
- Stage: Stage-8
- Conditional Operator
-
- Stage: Stage-5
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-0
Move Operator
tables:
@@ -393,51 +414,161 @@
Stats-Aggr Operator
#### A masked pattern was here ####
- Stage: Stage-4
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+#### A masked pattern was here ####
+ Fetch Operator
+ limit: -1
+#### A masked pattern was here ####
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+#### A masked pattern was here ####
+ TableScan
+ GatherStats: false
+ Union
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col1}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+#### A masked pattern was here ####
+ TableScan
+ GatherStats: false
+ Union
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col1}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+
+ Stage: Stage-7
Map Reduce
Alias -> Map Operator Tree:
+ x
+ TableScan
+ alias: x
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col1}
+ 1 {key} {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ outputColumnNames: _col1, _col2, _col3
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: UDFToInteger(_col2)
+ type: int
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ NumFilesPerFileSink: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:int
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value,val2
+ columns.types string:string:int
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, i32 val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.dest_j1
+ serialization.ddl struct dest_j1 { string key, string value, i32 val2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Needs Tagging: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: -ext-10003
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10003
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+#### A masked pattern was here ####
+ Partition
+ base file name: src1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
- columns key,value,val2
- columns.types string:string:int
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, i32 val2}
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -445,65 +576,133 @@
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
- columns key,value,val2
- columns.types string:string:int
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, i32 val2}
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- name: default.dest_j1
+ name: default.src1
+ name: default.src1
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /src1 [x]
- Stage: Stage-6
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ TableScan
+ GatherStats: false
+ Union
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col1
+ type: bigint
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:int
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, i32 val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Needs Tagging: false
+ TableScan
+ GatherStats: false
+ Union
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col1
+ type: bigint
+ x
+ TableScan
+ alias: x
+ GatherStats: false
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: -ext-10003
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10003
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+#### A masked pattern was here ####
+ Partition
+ base file name: src1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
- columns key,value,val2
- columns.types string:string:int
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, i32 val2}
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -511,27 +710,78 @@
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
- columns key,value,val2
- columns.types string:string:int
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, i32 val2}
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- name: default.dest_j1
+ name: default.src1
+ name: default.src1
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: UDFToInteger(_col2)
+ type: int
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value,val2
+ columns.types string:string:int
+#### A masked pattern was here ####
+ name default.dest_j1
+ serialization.ddl struct dest_j1 { string key, string value, i32 val2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Truncated Path -> Alias:
+ /src1 [x]
#### A masked pattern was here ####
- Stage: Stage-7
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-9
+ Stage: Stage-4
Map Reduce
Alias -> Map Operator Tree:
null-subquery2:subq1-subquery2:x1
@@ -652,7 +902,7 @@
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt
+SELECT x.key, x.value, subq1.cnt
FROM
( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key
UNION ALL
@@ -664,7 +914,7 @@
PREHOOK: Input: default@src1
PREHOOK: Output: default@dest_j1
POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt
+SELECT x.key, x.value, subq1.cnt
FROM
( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key
UNION ALL
Index: ql/src/test/results/clientpositive/mapjoin_subquery2.q.out
===================================================================
--- ql/src/test/results/clientpositive/mapjoin_subquery2.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/mapjoin_subquery2.q.out (working copy)
@@ -49,64 +49,46 @@
POSTHOOK: query: load data local inpath '../data/files/z.txt' INTO TABLE z
POSTHOOK: type: LOAD
POSTHOOK: Output: default@z
-PREHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
+SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
FROM
(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2
FROM y JOIN x ON (x.id = y.id)) subq
JOIN z ON (subq.key1 = z.id)
PREHOOK: type: QUERY
-PREHOOK: Input: default@x
-PREHOOK: Input: default@y
-PREHOOK: Input: default@z
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
+SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
FROM
(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2
FROM y JOIN x ON (x.id = y.id)) subq
JOIN z ON (subq.key1 = z.id)
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@x
-POSTHOOK: Input: default@y
-POSTHOOK: Input: default@z
-#### A masked pattern was here ####
-2 Joe 2 Tie 2 Tie
-2 Hank 2 Tie 2 Tie
-PREHOOK: query: EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
-FROM
-(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2
- FROM y JOIN x ON (x.id = y.id)) subq
- JOIN z ON (subq.key1 = z.id)
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
-FROM
-(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2
- FROM y JOIN x ON (x.id = y.id)) subq
- JOIN z ON (subq.key1 = z.id)
-POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME y)) (TOK_TABREF (TOK_TABNAME x)) (= (. (TOK_TABLE_OR_COL x) id) (. (TOK_TABLE_OR_COL y) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) id) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) name) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) id) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) name) value2)))) subq) (TOK_TABREF (TOK_TABNAME z)) (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) name)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME y)) (TOK_TABREF (TOK_TABNAME x)) (= (. (TOK_TABLE_OR_COL x) id) (. (TOK_TABLE_OR_COL y) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) id) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) name) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) id) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) name) value2)))) subq) (TOK_TABREF (TOK_TABNAME z)) (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) value2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) name)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
- subq:x
+ subq:y
Fetch Operator
limit: -1
z
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- subq:x
+ subq:y
TableScan
- alias: x
+ alias: y
HashTable Sink Operator
condition expressions:
0 {id} {name}
@@ -115,7 +97,7 @@
keys:
0 [Column[id]]
1 [Column[id]]
- Position of Big Table: 0
+ Position of Big Table: 1
z
TableScan
alias: z
@@ -129,12 +111,12 @@
1 [Column[id]]
Position of Big Table: 0
- Stage: Stage-1
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
- subq:y
+ subq:x
TableScan
- alias: y
+ alias: x
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -146,29 +128,18 @@
0 [Column[id]]
1 [Column[id]]
outputColumnNames: _col0, _col1, _col4, _col5
- Position of Big Table: 0
+ Position of Big Table: 1
Select Operator
expressions:
+ expr: _col5
+ type: int
+ expr: _col4
+ type: string
expr: _col0
type: int
expr: _col1
type: string
- expr: _col4
- type: string
- expr: _col5
- type: int
- outputColumnNames: _col0, _col1, _col4, _col5
- Select Operator
- expressions:
- expr: _col5
- type: int
- expr: _col4
- type: string
- expr: _col0
- type: int
- expr: _col1
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -196,27 +167,12 @@
expr: _col5
type: string
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col2
- type: int
- expr: _col3
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Local Work:
Map Reduce Local Work
@@ -225,9 +181,9 @@
limit: -1
-PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+PREHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
FROM
-(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2
+(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2
FROM y JOIN x ON (x.id = y.id)) subq
JOIN z ON (subq.key1 = z.id)
PREHOOK: type: QUERY
@@ -235,9 +191,9 @@
PREHOOK: Input: default@y
PREHOOK: Input: default@z
#### A masked pattern was here ####
-POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+POSTHOOK: query: SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
FROM
-(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2
+(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2
FROM y JOIN x ON (x.id = y.id)) subq
JOIN z ON (subq.key1 = z.id)
POSTHOOK: type: QUERY
Index: ql/src/test/results/clientpositive/join30.q.out
===================================================================
--- ql/src/test/results/clientpositive/join30.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join30.q.out (working copy)
@@ -15,14 +15,13 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key))))
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-1 depends on stages: Stage-5
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
x
@@ -60,50 +59,39 @@
1 [Column[key]]
outputColumnNames: _col0
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- keys:
+ Select Operator
+ expressions:
expr: _col0
type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
expr: _col0
type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Local Work:
+ Map Reduce Local Work
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -147,7 +135,7 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
- Stage: Stage-3
+ Stage: Stage-2
Stats-Aggr Operator
Index: ql/src/test/results/clientpositive/bucketcontext_4.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketcontext_4.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketcontext_4.q.out (working copy)
@@ -81,13 +81,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -133,21 +132,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -202,47 +200,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -273,7 +230,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
Stage: Stage-0
Fetch Operator
@@ -306,7 +263,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -328,21 +284,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -395,47 +350,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -466,7 +380,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketmapjoin9.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketmapjoin9.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketmapjoin9.q.out (working copy)
@@ -70,13 +70,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -115,21 +114,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -183,47 +181,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -254,7 +211,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
Stage: Stage-0
Fetch Operator
@@ -335,13 +292,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -380,21 +336,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -448,47 +403,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -519,7 +433,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketmapjoin13.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketmapjoin13.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out (working copy)
@@ -98,13 +98,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -143,21 +142,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -258,48 +256,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -330,7 +286,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Stage: Stage-0
Fetch Operator
@@ -388,13 +345,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '2')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -440,21 +396,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -508,47 +463,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -579,7 +493,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=2 [a]
Stage: Stage-0
Fetch Operator
@@ -649,13 +563,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -701,21 +614,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -769,47 +681,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -840,7 +711,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=2 [a]
Stage: Stage-0
Fetch Operator
@@ -912,13 +783,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -964,21 +834,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -1032,47 +901,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=2 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1103,7 +931,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=2 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/join28.q.out
===================================================================
--- ql/src/test/results/clientpositive/join28.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join28.q.out (working copy)
@@ -3,50 +3,49 @@
POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@dest_j1
-PREHOOK: query: EXPLAIN
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)))))
STAGE DEPENDENCIES:
- Stage-10 is a root stage
- Stage-1 depends on stages: Stage-10
- Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
- Stage-4
- Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
- Stage-2 depends on stages: Stage-0
- Stage-3
- Stage-5
- Stage-6 depends on stages: Stage-5
+ Stage-8 is a root stage
+ Stage-7 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-7
+ Stage-3 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-10
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
- subq:x
+ subq:y
Fetch Operator
limit: -1
z
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- subq:x
+ subq:y
TableScan
- alias: x
+ alias: y
HashTable Sink Operator
condition expressions:
0 {key}
@@ -55,7 +54,7 @@
keys:
0 [Column[key]]
1 [Column[key]]
- Position of Big Table: 1
+ Position of Big Table: 0
z
TableScan
alias: z
@@ -69,12 +68,12 @@
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-1
+ Stage: Stage-7
Map Reduce
Alias -> Map Operator Tree:
- subq:y
+ subq:x
TableScan
- alias: y
+ alias: x
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -86,17 +85,12 @@
0 [Column[key]]
1 [Column[key]]
outputColumnNames: _col0
- Position of Big Table: 1
+ Position of Big Table: 0
Select Operator
expressions:
expr: _col0
type: string
outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -115,34 +109,18 @@
type: string
expr: _col5
type: string
- outputColumnNames: _col0, _col5
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- GlobalTableId: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
Local Work:
Map Reduce Local Work
- Stage: Stage-7
- Conditional Operator
-
- Stage: Stage-4
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
Stage: Stage-0
Move Operator
tables:
@@ -153,46 +131,14 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
- Stage: Stage-2
+ Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-3
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- Stage: Stage-5
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
-
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
-
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
@@ -202,9 +148,9 @@
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Output: default@dest_j1
POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
Index: ql/src/test/results/clientpositive/smb_mapjoin_13.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (working copy)
@@ -77,7 +77,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -100,21 +99,43 @@
1 [Column[value]]
outputColumnNames: _col0, _col1, _col4, _col5
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: int
+ expr: _col3
+ type: string
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -165,70 +186,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table1
name: default.test_table1
- Truncated Path -> Alias:
- /test_table1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col4, _col5
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col2
- type: int
- expr: _col3
- type: string
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
Reduce Operator Tree:
Extract
Limit
@@ -250,7 +207,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /test_table1 [a]
Stage: Stage-0
Fetch Operator
@@ -307,13 +264,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -353,21 +309,43 @@
1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[value]()]
outputColumnNames: _col0, _col1, _col4, _col5
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: int
+ expr: _col3
+ type: string
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -420,70 +398,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
name: default.test_table3
- Truncated Path -> Alias:
- /test_table3 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col4, _col5
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col2
- type: int
- expr: _col3
- type: string
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
Reduce Operator Tree:
Extract
Limit
@@ -505,7 +419,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /test_table3 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out
===================================================================
--- ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out (working copy)
@@ -64,13 +64,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table_desc1) a) (TOK_TABREF (TOK_TABNAME table_desc2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 10))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -116,37 +115,26 @@
1 [Column[key], Column[value]]
outputColumnNames: _col0
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
Index: ql/src/test/results/clientpositive/join34.q.out
===================================================================
--- ql/src/test/results/clientpositive/join34.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join34.q.out (working copy)
@@ -3,9 +3,11 @@
POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@dest_j1
-PREHOOK: query: EXPLAIN EXTENDED
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value
+SELECT x.key, x.value, subq1.value
FROM
( SELECT x.key as key, x.value as value from src x where x.key < 20
UNION ALL
@@ -13,9 +15,11 @@
) subq1
JOIN src1 x ON (x.key = subq1.key)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN EXTENDED
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value
+SELECT x.key, x.value, subq1.value
FROM
( SELECT x.key as key, x.value as value from src x where x.key < 20
UNION ALL
@@ -24,21 +28,16 @@
JOIN src1 x ON (x.key = subq1.key)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) value) value)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) value) value)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100))))) subq1) (TOK_TABREF (TOK_TABNAME src1) x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value)))))
STAGE DEPENDENCIES:
- Stage-10 is a root stage
- Stage-1 depends on stages: Stage-10
- Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
- Stage-4
- Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
- Stage-3
- Stage-5
- Stage-6 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-10
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
x
@@ -59,7 +58,7 @@
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-1
+ Stage: Stage-5
Map Reduce
Alias -> Map Operator Tree:
null-subquery1:subq1-subquery1:x
@@ -93,46 +92,37 @@
Position of Big Table: 0
Select Operator
expressions:
- expr: _col1
- type: string
expr: _col2
type: string
expr: _col3
type: string
- outputColumnNames: _col1, _col2, _col3
- Select Operator
- expressions:
- expr: _col2
- type: string
- expr: _col3
- type: string
- expr: _col1
- type: string
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
+ NumFilesPerFileSink: 1
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value,val2
+ columns.types string:string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.dest_j1
+ serialization.ddl struct dest_j1 { string key, string value, string val2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
null-subquery2:subq1-subquery2:x1
TableScan
alias: x1
@@ -164,49 +154,40 @@
Position of Big Table: 0
Select Operator
expressions:
- expr: _col1
- type: string
expr: _col2
type: string
expr: _col3
type: string
- outputColumnNames: _col1, _col2, _col3
- Select Operator
- expressions:
- expr: _col2
- type: string
- expr: _col3
- type: string
- expr: _col1
- type: string
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
+ NumFilesPerFileSink: 1
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value,val2
+ columns.types string:string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.dest_j1
+ serialization.ddl struct dest_j1 { string key, string value, string val2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
- Needs Tagging: false
+ Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -252,89 +233,25 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src
name: default.src
- Truncated Path -> Alias:
- /src [null-subquery1:subq1-subquery1:x, null-subquery2:subq1-subquery2:x1]
-
- Stage: Stage-7
- Conditional Operator
-
- Stage: Stage-4
- Move Operator
- files:
- hdfs directory: true
#### A masked pattern was here ####
-
- Stage: Stage-0
- Move Operator
- tables:
- replace: true
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
-#### A masked pattern was here ####
-
- Stage: Stage-2
- Stats-Aggr Operator
-#### A masked pattern was here ####
-
- Stage: Stage-3
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
Partition
- base file name: -ext-10002
+ base file name: src1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
- columns key,value,val2
- columns.types string:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -342,68 +259,31 @@
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
- columns key,value,val2
- columns.types string:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- name: default.dest_j1
+ name: default.src1
+ name: default.src1
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /src [null-subquery1:subq1-subquery1:x, null-subquery2:subq1-subquery2:x1]
- Stage: Stage-5
- Map Reduce
- Alias -> Map Operator Tree:
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
+ table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -418,19 +298,15 @@
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
- name: default.dest_j1
- Truncated Path -> Alias:
#### A masked pattern was here ####
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
+ Stage: Stage-2
+ Stats-Aggr Operator
#### A masked pattern was here ####
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value
+SELECT x.key, x.value, subq1.value
FROM
( SELECT x.key as key, x.value as value from src x where x.key < 20
UNION ALL
@@ -442,7 +318,7 @@
PREHOOK: Input: default@src1
PREHOOK: Output: default@dest_j1
POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value
+SELECT x.key, x.value, subq1.value
FROM
( SELECT x.key as key, x.value as value from src x where x.key < 20
UNION ALL
Index: ql/src/test/results/clientpositive/skewjoin.q.out
===================================================================
--- ql/src/test/results/clientpositive/skewjoin.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/skewjoin.q.out (working copy)
@@ -1524,13 +1524,12 @@
(TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME T1) k) (TOK_TABREF (TOK_TABNAME T1) v) (= (+ (. (TOK_TABLE_OR_COL k) key) 1) (. (TOK_TABLE_OR_COL v) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST v))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL k) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL v) val)))))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
v
@@ -1568,48 +1567,37 @@
1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()]
outputColumnNames: _col0, _col5
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col5
+ Group By Operator
+ aggregations:
+ expr: sum(hash(_col0))
+ expr: sum(hash(_col5))
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ expr: _col1
+ type: bigint
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col5
- type: string
- outputColumnNames: _col0, _col5
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col5
- type: string
- outputColumnNames: _col0, _col5
- Group By Operator
- aggregations:
- expr: sum(hash(_col0))
- expr: sum(hash(_col5))
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- expr: _col1
- type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
Index: ql/src/test/results/clientpositive/bucketcontext_8.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketcontext_8.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketcontext_8.q.out (working copy)
@@ -94,13 +94,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -146,21 +145,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -263,48 +261,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -335,7 +291,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
@@ -370,7 +327,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -392,21 +348,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -507,48 +462,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -579,7 +492,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketcontext_3.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketcontext_3.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketcontext_3.q.out (working copy)
@@ -69,13 +69,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -121,21 +120,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -190,47 +188,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -261,7 +218,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
Stage: Stage-0
Fetch Operator
@@ -294,7 +251,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -316,21 +272,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -383,47 +338,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -454,7 +368,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketmapjoin8.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketmapjoin8.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketmapjoin8.q.out (working copy)
@@ -64,13 +64,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -116,21 +115,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -184,47 +182,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -255,7 +212,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
Stage: Stage-0
Fetch Operator
@@ -307,13 +264,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -359,21 +315,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -427,47 +382,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -498,7 +412,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketmapjoin12.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketmapjoin12.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketmapjoin12.q.out (working copy)
@@ -92,13 +92,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -144,21 +143,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -212,47 +210,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -283,7 +240,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
Stage: Stage-0
Fetch Operator
@@ -327,13 +284,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_3) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -372,21 +328,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -440,47 +395,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -511,7 +425,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/multiMapJoin1.q.out
===================================================================
--- ql/src/test/results/clientpositive/multiMapJoin1.q.out (revision 0)
+++ ql/src/test/results/clientpositive/multiMapJoin1.q.out (working copy)
@@ -0,0 +1,706 @@
+PREHOOK: query: -- Join of a big table with 2 small tables on different keys should be performed as a single MR job
+create table smallTbl1(key string, value string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Join of a big table with 2 small tables on different keys should be performed as a single MR job
+create table smallTbl1(key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@smallTbl1
+PREHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@smalltbl1
+POSTHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@smalltbl1
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: create table smallTbl2(key string, value string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table smallTbl2(key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@smallTbl2
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table smallTbl2 select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@smalltbl2
+POSTHOOK: query: insert overwrite table smallTbl2 select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@smalltbl2
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: create table bigTbl(key string, value string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table bigTbl(key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@bigTbl
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table bigTbl
+select * from
+(
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+) subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@bigtbl
+POSTHOOK: query: insert overwrite table bigTbl
+select * from
+(
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+) subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@bigtbl
+POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain
+select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-10 is a root stage , consists of Stage-13, Stage-14, Stage-3
+ Stage-13 has a backup stage: Stage-3
+ Stage-8 depends on stages: Stage-13
+ Stage-7 depends on stages: Stage-3, Stage-8, Stage-9 , consists of Stage-11, Stage-12, Stage-1
+ Stage-11 has a backup stage: Stage-1
+ Stage-5 depends on stages: Stage-11
+ Stage-2 depends on stages: Stage-1, Stage-5, Stage-6
+ Stage-12 has a backup stage: Stage-1
+ Stage-6 depends on stages: Stage-12
+ Stage-1
+ Stage-14 has a backup stage: Stage-3
+ Stage-9 depends on stages: Stage-14
+ Stage-3
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-10
+ Conditional Operator
+
+ Stage: Stage-13
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ firstjoin:smalltbl1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ firstjoin:smalltbl1
+ TableScan
+ alias: smalltbl1
+ HashTable Sink Operator
+ condition expressions:
+ 0 {value}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+
+ Stage: Stage-8
+ Map Reduce
+ Alias -> Map Operator Tree:
+ firstjoin:bigtbl
+ TableScan
+ alias: bigtbl
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {value}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col1
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ outputColumnNames: _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-11
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ smalltbl2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ smalltbl2
+ TableScan
+ alias: smalltbl2
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ Position of Big Table: 0
+
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ Position of Big Table: 0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-12
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ Position of Big Table: 1
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+ smalltbl2
+ TableScan
+ alias: smalltbl2
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ Position of Big Table: 1
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col1
+ type: string
+ tag: 0
+ smalltbl2
+ TableScan
+ alias: smalltbl2
+ Reduce Output Operator
+ key expressions:
+ expr: value
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: value
+ type: string
+ tag: 1
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-14
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ firstjoin:bigtbl
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ firstjoin:bigtbl
+ TableScan
+ alias: bigtbl
+ HashTable Sink Operator
+ condition expressions:
+ 0 {value}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ firstjoin:smalltbl1
+ TableScan
+ alias: smalltbl1
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {value}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col1
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ outputColumnNames: _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ firstjoin:bigtbl
+ TableScan
+ alias: bigtbl
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 0
+ value expressions:
+ expr: value
+ type: string
+ firstjoin:smalltbl1
+ TableScan
+ alias: smalltbl1
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: 1
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col1}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col1
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ outputColumnNames: _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bigtbl
+PREHOOK: Input: default@smalltbl1
+PREHOOK: Input: default@smalltbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bigtbl
+POSTHOOK: Input: default@smalltbl1
+POSTHOOK: Input: default@smalltbl2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+580
+PREHOOK: query: explain
+select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
+ Stage-2 depends on stages: Stage-6
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-7
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ firstjoin:smalltbl1
+ Fetch Operator
+ limit: -1
+ smalltbl2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ firstjoin:smalltbl1
+ TableScan
+ alias: smalltbl1
+ HashTable Sink Operator
+ condition expressions:
+ 0 {value}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ smalltbl2
+ TableScan
+ alias: smalltbl2
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ Position of Big Table: 0
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+ firstjoin:bigtbl
+ TableScan
+ alias: bigtbl
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {value}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col1
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ outputColumnNames: _col1
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ Position of Big Table: 0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bigtbl
+PREHOOK: Input: default@smalltbl1
+PREHOOK: Input: default@smalltbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bigtbl
+POSTHOOK: Input: default@smalltbl1
+POSTHOOK: Input: default@smalltbl2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+580
Index: ql/src/test/results/clientpositive/bucket_map_join_2.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucket_map_join_2.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucket_map_join_2.q.out (working copy)
@@ -50,13 +50,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -95,21 +94,20 @@
0 [Column[key], Column[value]]
1 [Column[key], Column[value]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -162,47 +160,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.table1
name: default.table1
- Truncated Path -> Alias:
- /table1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -233,7 +190,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /table1 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out
===================================================================
--- ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out (working copy)
@@ -67,7 +67,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -93,35 +92,24 @@
1 [Column[key], Column[value]]
outputColumnNames: _col0
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
+ Select Operator
+ expressions:
expr: _col0
- type: bigint
+ type: string
+ outputColumnNames: _col0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
Index: ql/src/test/results/clientpositive/join38.q.out
===================================================================
--- ql/src/test/results/clientpositive/join38.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join38.q.out (working copy)
@@ -73,13 +73,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME tmp) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) col11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) col5)) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) col11) 111)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) col5))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -125,62 +124,51 @@
1 [Column[col11]]
outputColumnNames: _col1, _col9, _col15
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col1
- type: string
- expr: _col9
- type: string
- expr: _col15
- type: string
- outputColumnNames: _col1, _col9, _col15
- Select Operator
- expressions:
- expr: _col1
- type: string
- expr: _col9
- type: string
- outputColumnNames: _col1, _col9
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- keys:
- expr: _col1
- type: string
- expr: _col9
- type: string
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
+ Select Operator
+ expressions:
expr: _col1
type: string
- sort order: ++
- Map-reduce partition columns:
- expr: _col0
+ expr: _col9
type: string
- expr: _col1
+ expr: _col15
type: string
- tag: -1
- value expressions:
- expr: _col2
- type: bigint
+ outputColumnNames: _col1, _col9, _col15
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ expr: _col9
+ type: string
+ outputColumnNames: _col1, _col9
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: _col1
+ type: string
+ expr: _col9
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Local Work:
+ Map Reduce Local Work
Reduce Operator Tree:
Group By Operator
aggregations:
Index: ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out
===================================================================
--- ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out (working copy)
@@ -56,13 +56,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src1)) (TOK_TABREF (TOK_TABNAME src1) src2) (AND (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10)) (> (. (TOK_TABLE_OR_COL src2) key) 10))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 300)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src1 src2))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key)))))
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-1 depends on stages: Stage-5
- Stage-2 depends on stages: Stage-1
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
src1
@@ -146,72 +145,61 @@
2 [Column[key]]
outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
Position of Big Table: 2
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: string
+ outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col8
+ type: string
+ expr: _col9
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: string
+ expr: _col4
+ type: string
+ sort order: +++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: _col4
- type: string
- expr: _col5
- type: string
- expr: _col8
- type: string
- expr: _col9
- type: string
- outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: _col4
- type: string
- expr: _col5
- type: string
- expr: _col8
- type: string
- expr: _col9
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- expr: _col2
- type: string
- expr: _col4
- type: string
- sort order: +++
- tag: -1
- value expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: _col2
- type: string
- expr: _col3
- type: string
- expr: _col4
- type: string
- expr: _col5
- type: string
Reduce Operator Tree:
Extract
File Output Operator
Index: ql/src/test/results/clientpositive/join33.q.out
===================================================================
--- ql/src/test/results/clientpositive/join33.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join33.q.out (working copy)
@@ -3,35 +3,41 @@
POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@dest_j1
-PREHOOK: query: EXPLAIN EXTENDED
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN EXTENDED
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value)))))
STAGE DEPENDENCIES:
- Stage-6 is a root stage
- Stage-3 depends on stages: Stage-6
- Stage-1 depends on stages: Stage-3
- Stage-0 depends on stages: Stage-1
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
+ Stage-0 depends on stages: Stage-6
Stage-2 depends on stages: Stage-0
STAGE PLANS:
- Stage: Stage-6
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
x
Fetch Operator
limit: -1
+ z
+ Fetch Operator
+ limit: -1
Alias -> Map Local Operator Tree:
x
TableScan
@@ -46,8 +52,21 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
+ z
+ TableScan
+ alias: z
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col5} {_col0}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ Position of Big Table: 0
- Stage: Stage-3
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
y
@@ -66,24 +85,54 @@
1 [Column[key]]
outputColumnNames: _col0, _col1, _col5
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col5} {_col0}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ outputColumnNames: _col1, _col4, _col9
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col4
+ type: string
+ expr: _col9
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col5
- columns.types string,string,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value,val2
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.dest_j1
+ serialization.ddl struct dest_j1 { string key, string value, string val2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
- Needs Tagging: false
+ Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -129,94 +178,25 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src
name: default.src
- Truncated Path -> Alias:
- /src [y]
-
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col5
- Reduce Output Operator
- key expressions:
- expr: _col1
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col1
- type: string
- tag: 0
- value expressions:
- expr: _col5
- type: string
- expr: _col0
- type: string
- z
- TableScan
- alias: z
- GatherStats: false
- Reduce Output Operator
- key expressions:
- expr: value
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: value
- type: string
- tag: 1
- value expressions:
- expr: value
- type: string
- Needs Tagging: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col5
- columns.types string,string,string
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col5
- columns.types string,string,string
- escape.delim \
-#### A masked pattern was here ####
- Partition
- base file name: hr=11
+ base file name: src1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- partition values:
- ds 2008-04-08
- hr 11
properties:
bucket_count -1
columns key,value
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
+ name default.src1
numFiles 1
+ numPartitions 0
numRows 0
- partition_columns ds/hr
rawDataSize 0
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 5812
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -227,65 +207,21 @@
columns key,value
columns.types string:string
#### A masked pattern was here ####
- name default.srcpart
- numFiles 4
- numPartitions 4
+ name default.src1
+ numFiles 1
+ numPartitions 0
numRows 0
- partition_columns ds/hr
rawDataSize 0
- serialization.ddl struct srcpart { string key, string value}
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 23248
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.srcpart
- name: default.srcpart
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {VALUE._col1} {VALUE._col4}
- 1 {VALUE._col1}
- handleSkewJoin: false
- outputColumnNames: _col1, _col4, _col9
- Select Operator
- expressions:
- expr: _col4
- type: string
- expr: _col9
- type: string
- expr: _col1
- type: string
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ name: default.src1
+ name: default.src1
Truncated Path -> Alias:
- /srcpart/ds=2008-04-08/hr=11 [z]
-#### A masked pattern was here ####
+ /src [y]
Stage: Stage-0
Move Operator
@@ -315,7 +251,7 @@
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
@@ -325,7 +261,7 @@
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Output: default@dest_j1
POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
Index: ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
===================================================================
--- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (working copy)
@@ -1,24 +1,29 @@
-PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key)
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key)
PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key)
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key)))))
STAGE DEPENDENCIES:
- Stage-6 is a root stage
- Stage-1 depends on stages: Stage-6
- Stage-5 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-5
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-6
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
src
Fetch Operator
limit: -1
+ src1
+ Fetch Operator
+ limit: -1
Alias -> Map Local Operator Tree:
src
TableScan
@@ -32,42 +37,7 @@
0 [Column[value]]
1 [Column[value]]
Position of Big Table: 0
-
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
- srcpart
- TableScan
- alias: srcpart
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key}
- 1
- handleSkewJoin: false
- keys:
- 0 [Column[value]]
- 1 [Column[value]]
- outputColumnNames: _col0
- Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-5
- Map Reduce Local Work
- Alias -> Map Local Tables:
src1
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- src1
TableScan
alias: src1
HashTable Sink Operator
@@ -80,43 +50,47 @@
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-2
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
+ srcpart
+ TableScan
+ alias: srcpart
Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0 {_col0}
+ 0 {key}
1
handleSkewJoin: false
keys:
- 0 [Column[_col0]]
- 1 [Column[key]]
+ 0 [Column[value]]
+ 1 [Column[value]]
outputColumnNames: _col0
Position of Big Table: 0
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: string
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
outputColumnNames: _col0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Local Work:
Map Reduce Local Work
@@ -125,28 +99,29 @@
limit: -1
-PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds
+PREHOOK: query: explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds
PREHOOK: type: QUERY
-POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds
+POSTHOOK: query: explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart)) (TOK_TABREF (TOK_TABNAME src)) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF (TOK_TABNAME src1)) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds))))
STAGE DEPENDENCIES:
- Stage-7 is a root stage
- Stage-1 depends on stages: Stage-7
- Stage-6 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-6
- Stage-3 depends on stages: Stage-2
+ Stage-8 is a root stage
+ Stage-7 depends on stages: Stage-8
+ Stage-3 depends on stages: Stage-7
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-7
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
src
Fetch Operator
limit: -1
+ src1
+ Fetch Operator
+ limit: -1
Alias -> Map Local Operator Tree:
src
TableScan
@@ -160,42 +135,7 @@
0 [Column[value]]
1 [Column[value]]
Position of Big Table: 0
-
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
- srcpart
- TableScan
- alias: srcpart
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {key} {ds}
- 1
- handleSkewJoin: false
- keys:
- 0 [Column[value]]
- 1 [Column[value]]
- outputColumnNames: _col0, _col2
- Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-6
- Map Reduce Local Work
- Alias -> Map Local Tables:
src1
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- src1
TableScan
alias: src1
HashTable Sink Operator
@@ -208,35 +148,56 @@
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-2
+ Stage: Stage-7
Map Reduce
Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col2
- type: string
- outputColumnNames: _col0, _col2
+ srcpart
+ TableScan
+ alias: srcpart
Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
- 0 {_col2}
+ 0 {key} {ds}
1
handleSkewJoin: false
keys:
- 0 [Column[_col0]]
- 1 [Column[key]]
- outputColumnNames: _col2
+ 0 [Column[value]]
+ 1 [Column[value]]
+ outputColumnNames: _col0, _col2
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col2}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ outputColumnNames: _col2
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col2
+ type: string
+ outputColumnNames: _col2
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col2
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Local Work:
Map Reduce Local Work
@@ -244,37 +205,18 @@
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col2
- type: string
- outputColumnNames: _col2
- Select Operator
- expressions:
- expr: _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- outputColumnNames: _col2
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- keys:
- expr: _col2
- type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -302,7 +244,7 @@
limit: -1
-PREHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds
+PREHOOK: query: select count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@srcpart
@@ -311,7 +253,7 @@
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
-POSTHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds
+POSTHOOK: query: select count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@srcpart
Index: ql/src/test/results/clientpositive/bucketcontext_7.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketcontext_7.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketcontext_7.q.out (working copy)
@@ -94,13 +94,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -146,21 +145,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -263,48 +261,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -335,7 +291,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
@@ -370,7 +327,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -392,21 +348,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -507,48 +462,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -579,7 +492,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketcontext_2.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketcontext_2.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketcontext_2.q.out (working copy)
@@ -69,13 +69,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -121,21 +120,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -238,48 +236,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -310,7 +266,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
@@ -343,7 +300,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -365,21 +321,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -480,48 +435,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -552,7 +465,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketmapjoin11.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketmapjoin11.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketmapjoin11.q.out (working copy)
@@ -124,13 +124,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -176,21 +175,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -291,48 +289,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -363,7 +319,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Stage: Stage-0
Fetch Operator
@@ -407,13 +364,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) (. (TOK_TABLE_OR_COL b) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -459,21 +415,20 @@
0 [Column[key], Column[part]]
1 [Column[key], Column[part]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -574,48 +529,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -646,7 +559,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/smb_mapjoin_16.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_16.q.out (revision 0)
+++ ql/src/test/results/clientpositive/smb_mapjoin_16.q.out (working copy)
@@ -0,0 +1,120 @@
+PREHOOK: query: -- Create bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Create bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table1
+PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_table2
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *
+INSERT OVERWRITE TABLE test_table2 SELECT *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_table1
+PREHOOK: Output: default@test_table2
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *
+INSERT OVERWRITE TABLE test_table2 SELECT *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_table1
+POSTHOOK: Output: default@test_table2
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job
+EXPLAIN
+SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job
+EXPLAIN
+SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_table1
+PREHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_table1
+POSTHOOK: Input: default@test_table2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+1028
Index: ql/src/test/results/clientpositive/mapjoin_subquery.q.out
===================================================================
--- ql/src/test/results/clientpositive/mapjoin_subquery.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/mapjoin_subquery.q.out (working copy)
@@ -1,39 +1,43 @@
-PREHOOK: query: EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
- subq:x
+ subq:y
Fetch Operator
limit: -1
z
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- subq:x
+ subq:y
TableScan
- alias: x
+ alias: y
HashTable Sink Operator
condition expressions:
0 {key}
@@ -42,7 +46,7 @@
keys:
0 [Column[key]]
1 [Column[key]]
- Position of Big Table: 1
+ Position of Big Table: 0
z
TableScan
alias: z
@@ -56,12 +60,12 @@
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-1
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
- subq:y
+ subq:x
TableScan
- alias: y
+ alias: x
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -73,17 +77,12 @@
0 [Column[key]]
1 [Column[key]]
outputColumnNames: _col0
- Position of Big Table: 1
+ Position of Big Table: 0
Select Operator
expressions:
expr: _col0
type: string
outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -102,20 +101,13 @@
type: string
expr: _col5
type: string
- outputColumnNames: _col0, _col5
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Local Work:
Map Reduce Local Work
@@ -124,93 +116,91 @@
limit: -1
-PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+PREHOOK: query: SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
+ORDER BY subq.key1, z.value
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
-POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+POSTHOOK: query: SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
+ORDER BY subq.key1, z.value
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
-238 val_238
-238 val_238
-311 val_311
-311 val_311
-311 val_311
-255 val_255
-255 val_255
-278 val_278
-278 val_278
-98 val_98
-98 val_98
-401 val_401
-401 val_401
-401 val_401
-401 val_401
-401 val_401
-150 val_150
-273 val_273
-273 val_273
-273 val_273
-224 val_224
-224 val_224
-369 val_369
-369 val_369
-369 val_369
-66 val_66
128 val_128
128 val_128
128 val_128
-213 val_213
-213 val_213
-146 val_146
-146 val_146
-406 val_406
-406 val_406
-406 val_406
-406 val_406
128 val_128
128 val_128
128 val_128
-311 val_311
-311 val_311
-311 val_311
+128 val_128
+128 val_128
+128 val_128
+146 val_146
+146 val_146
+146 val_146
+146 val_146
+150 val_150
213 val_213
213 val_213
+213 val_213
+213 val_213
+224 val_224
+224 val_224
+224 val_224
+224 val_224
+238 val_238
+238 val_238
+238 val_238
+238 val_238
+255 val_255
+255 val_255
+255 val_255
+255 val_255
+273 val_273
+273 val_273
+273 val_273
+273 val_273
+273 val_273
+273 val_273
+273 val_273
+273 val_273
+273 val_273
278 val_278
278 val_278
+278 val_278
+278 val_278
311 val_311
311 val_311
311 val_311
-98 val_98
-98 val_98
+311 val_311
+311 val_311
+311 val_311
+311 val_311
+311 val_311
+311 val_311
369 val_369
369 val_369
369 val_369
-238 val_238
-238 val_238
-273 val_273
-273 val_273
-273 val_273
-224 val_224
-224 val_224
369 val_369
369 val_369
369 val_369
+369 val_369
+369 val_369
+369 val_369
401 val_401
401 val_401
401 val_401
@@ -221,9 +211,21 @@
401 val_401
401 val_401
401 val_401
-128 val_128
-128 val_128
-128 val_128
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
406 val_406
406 val_406
406 val_406
@@ -232,66 +234,58 @@
406 val_406
406 val_406
406 val_406
-401 val_401
-401 val_401
-401 val_401
-401 val_401
-401 val_401
-255 val_255
-255 val_255
406 val_406
406 val_406
406 val_406
406 val_406
-401 val_401
-401 val_401
-401 val_401
-401 val_401
-401 val_401
-146 val_146
-146 val_146
-273 val_273
-273 val_273
-273 val_273
+406 val_406
+406 val_406
+406 val_406
+406 val_406
+66 val_66
+98 val_98
+98 val_98
+98 val_98
+98 val_98
PREHOOK: query: EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
- order by subq.key1
+ order by subq.key1, z.value
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
- order by subq.key1
+ order by subq.key1, z.value
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq) key1)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL z) value)))))
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-1 depends on stages: Stage-5
- Stage-2 depends on stages: Stage-1
+ Stage-8 is a root stage
+ Stage-7 depends on stages: Stage-8
+ Stage-3 depends on stages: Stage-7
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
- subq:x
+ subq:y
Fetch Operator
limit: -1
z
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- subq:x
+ subq:y
TableScan
- alias: x
+ alias: y
HashTable Sink Operator
condition expressions:
0 {key}
@@ -300,7 +294,7 @@
keys:
0 [Column[key]]
1 [Column[key]]
- Position of Big Table: 1
+ Position of Big Table: 0
z
TableScan
alias: z
@@ -314,12 +308,12 @@
1 [Column[key]]
Position of Big Table: 0
- Stage: Stage-1
+ Stage: Stage-7
Map Reduce
Alias -> Map Operator Tree:
- subq:y
+ subq:x
TableScan
- alias: y
+ alias: x
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -331,17 +325,12 @@
0 [Column[key]]
1 [Column[key]]
outputColumnNames: _col0
- Position of Big Table: 1
+ Position of Big Table: 0
Select Operator
expressions:
expr: _col0
type: string
outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -354,44 +343,39 @@
1 [Column[key]]
outputColumnNames: _col0, _col5
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Local Work:
Map Reduce Local Work
- Stage: Stage-2
+ Stage: Stage-3
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col5
- type: string
- outputColumnNames: _col0, _col5
- Select Operator
- expressions:
+ Reduce Output Operator
+ key expressions:
expr: _col0
type: string
- expr: _col5
+ expr: _col1
type: string
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
+ sort order: ++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
Reduce Operator Tree:
Extract
File Output Operator
@@ -406,24 +390,24 @@
limit: -1
-PREHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+PREHOOK: query: SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
- order by subq.key1
+ order by subq.key1, z.value
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
-POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+POSTHOOK: query: SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
- order by subq.key1
+ order by subq.key1, z.value
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
Index: ql/src/test/results/clientpositive/bucket_map_join_1.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucket_map_join_1.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucket_map_join_1.q.out (working copy)
@@ -50,13 +50,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -95,21 +94,20 @@
0 [Column[key], Column[value]]
1 [Column[key], Column[value]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -162,47 +160,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.table1
name: default.table1
- Truncated Path -> Alias:
- /table1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -233,7 +190,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /table1 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out
===================================================================
--- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out (working copy)
@@ -134,13 +134,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -179,21 +178,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -294,48 +292,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -366,7 +322,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out
===================================================================
--- ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out (working copy)
@@ -67,7 +67,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -93,35 +92,24 @@
1 [Column[key], Column[value]]
outputColumnNames: _col0
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
+ Select Operator
+ expressions:
expr: _col0
- type: bigint
+ type: string
+ outputColumnNames: _col0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
Index: ql/src/test/results/clientpositive/union22.q.out
===================================================================
--- ql/src/test/results/clientpositive/union22.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/union22.q.out (working copy)
@@ -42,26 +42,30 @@
POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: explain extended
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+explain extended
insert overwrite table dst_union22 partition (ds='2')
select * from
(
select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
union all
-select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
+select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
a.k1 = b.k1 and a.ds='1'
where a.k1 > 20
)
subq
PREHOOK: type: QUERY
-POSTHOOK: query: explain extended
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+explain extended
insert overwrite table dst_union22 partition (ds='2')
select * from
(
select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
union all
-select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
+select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
a.k1 = b.k1 and a.ds='1'
where a.k1 > 20
@@ -79,18 +83,22 @@
POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL k2) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL k3) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL k4) k4)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (<= (TOK_TABLE_OR_COL k0) 50))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME dst_union22) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (> (TOK_TABLE_OR_COL k0) 50))))) b) (and (= (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL b) k1)) (= (. (TOK_TABLE_OR_COL a) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k1) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k2) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k4) k4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) k1) 20))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dst_union22) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL k2) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL k3) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL k4) k4)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (<= (TOK_TABLE_OR_COL k0) 50))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME dst_union22) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME dst_union22_delta))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (> (TOK_TABLE_OR_COL k0) 50))))) b) (and (= (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL b) k1)) (= (. (TOK_TABLE_OR_COL a) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k1) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k2) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k4) k4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) k1) 20))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dst_union22) (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
STAGE DEPENDENCIES:
- Stage-7 is a root stage
- Stage-1 depends on stages: Stage-7
- Stage-2 depends on stages: Stage-1
- Stage-3 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
+ Stage-7 is a root stage , consists of Stage-8, Stage-1
+ Stage-8 has a backup stage: Stage-1
+ Stage-6 depends on stages: Stage-8
+ Stage-2 depends on stages: Stage-1, Stage-6
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1
STAGE PLANS:
Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
null-subquery2:subq-subquery2:b:dst_union22_delta
@@ -130,7 +138,7 @@
1 [Column[_col1]]
Position of Big Table: 0
- Stage: Stage-1
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
null-subquery2:subq-subquery2:a
@@ -159,24 +167,35 @@
1 [Column[_col1]]
outputColumnNames: _col0, _col1, _col10, _col11
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col10
+ type: string
+ expr: _col11
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col10,_col11
- columns.types string,string,string,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string,string,string,string
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
- Needs Tagging: false
+ Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -225,74 +244,56 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dst_union22
name: default.dst_union22
- Truncated Path -> Alias:
- /dst_union22/ds=1 [null-subquery2:subq-subquery2:a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: _col10
- type: string
- expr: _col11
- type: string
- outputColumnNames: _col0, _col1, _col10, _col11
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: _col10
- type: string
- expr: _col11
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ Partition
+ base file name: ds=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ properties:
+ bucket_count -1
+ columns k0,k1,k2,k3,k4,k5
+ columns.types string:string:string:string:string:string
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types string,string,string,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Needs Tagging: false
- Path -> Alias:
+ name default.dst_union22_delta
+ numFiles 1
+ numPartitions 1
+ numRows 500
+ partition_columns ds
+ rawDataSize 16936
+ serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 17436
#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col10,_col11
- columns.types string,string,string,string
- escape.delim \
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1,_col10,_col11
- columns.types string,string,string,string
- escape.delim \
+ bucket_count -1
+ columns k0,k1,k2,k3,k4,k5
+ columns.types string:string:string:string:string:string
+#### A masked pattern was here ####
+ name default.dst_union22_delta
+ numFiles 1
+ numPartitions 1
+ numRows 500
+ partition_columns ds
+ rawDataSize 16936
+ serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 17436
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dst_union22_delta
+ name: default.dst_union22_delta
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /dst_union22/ds=1 [null-subquery2:subq-subquery2:a]
- Stage: Stage-3
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
@@ -410,7 +411,7 @@
Path -> Partition:
#### A masked pattern was here ####
Partition
- base file name: -mr-10003
+ base file name: -mr-10002
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
@@ -503,17 +504,217 @@
name: default.dst_union22
#### A masked pattern was here ####
- Stage: Stage-4
+ Stage: Stage-3
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ null-subquery2:subq-subquery2:a
+ TableScan
+ alias: a
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (k1 > 20.0)
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: k1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: k1
+ type: string
+ tag: 0
+ value expressions:
+ expr: k1
+ type: string
+ expr: k2
+ type: string
+ expr: ds
+ type: string
+ null-subquery2:subq-subquery2:b:dst_union22_delta
+ TableScan
+ alias: dst_union22_delta
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: ((k0 > 50.0) and (k1 > 20.0))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: k1
+ type: string
+ expr: k3
+ type: string
+ expr: k4
+ type: string
+ outputColumnNames: _col1, _col3, _col4
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col1
+ type: string
+ tag: 1
+ value expressions:
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ Needs Tagging: true
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ properties:
+ bucket_count -1
+ columns k1,k2,k3,k4
+ columns.types string:string:string:string
+#### A masked pattern was here ####
+ name default.dst_union22
+ numFiles 1
+ numPartitions 1
+ numRows 500
+ partition_columns ds
+ rawDataSize 11124
+ serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 11624
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns k1,k2,k3,k4
+ columns.types string:string:string:string
+#### A masked pattern was here ####
+ name default.dst_union22
+ numFiles 1
+ numPartitions 1
+ numRows 500
+ partition_columns ds
+ rawDataSize 11124
+ serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 11624
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dst_union22
+ name: default.dst_union22
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ properties:
+ bucket_count -1
+ columns k0,k1,k2,k3,k4,k5
+ columns.types string:string:string:string:string:string
+#### A masked pattern was here ####
+ name default.dst_union22_delta
+ numFiles 1
+ numPartitions 1
+ numRows 500
+ partition_columns ds
+ rawDataSize 16936
+ serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 17436
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns k0,k1,k2,k3,k4,k5
+ columns.types string:string:string:string:string:string
+#### A masked pattern was here ####
+ name default.dst_union22_delta
+ numFiles 1
+ numPartitions 1
+ numRows 500
+ partition_columns ds
+ rawDataSize 16936
+ serialization.ddl struct dst_union22_delta { string k0, string k1, string k2, string k3, string k4, string k5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 17436
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dst_union22_delta
+ name: default.dst_union22_delta
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col3} {VALUE._col4}
+ filter mappings:
+ 0 [1, 1]
+ filter predicates:
+ 0 {(VALUE._col4 = '1')}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col10, _col11
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col10
+ type: string
+ expr: _col11
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types string,string,string,string
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Truncated Path -> Alias:
+ /dst_union22/ds=1 [null-subquery2:subq-subquery2:a]
+ /dst_union22_delta/ds=1 [null-subquery2:subq-subquery2:b:dst_union22_delta]
+
PREHOOK: query: insert overwrite table dst_union22 partition (ds='2')
select * from
(
select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
union all
-select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
+select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
a.k1 = b.k1 and a.ds='1'
where a.k1 > 20
@@ -530,7 +731,7 @@
(
select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
union all
-select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
+select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
a.k1 = b.k1 and a.ds='1'
where a.k1 > 20
Index: ql/src/test/results/clientpositive/join32.q.out
===================================================================
--- ql/src/test/results/clientpositive/join32.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join32.q.out (working copy)
@@ -3,41 +3,41 @@
POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@dest_j1
-PREHOOK: query: EXPLAIN EXTENDED
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN EXTENDED
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value)))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src1) x) (TOK_TABREF (TOK_TABNAME src) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF (TOK_TABNAME srcpart) z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value)))))
STAGE DEPENDENCIES:
- Stage-12 is a root stage
- Stage-8 depends on stages: Stage-12
- Stage-11 depends on stages: Stage-8
- Stage-1 depends on stages: Stage-11
- Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
- Stage-4
- Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
+ Stage-0 depends on stages: Stage-6
Stage-2 depends on stages: Stage-0
- Stage-3
- Stage-5
- Stage-6 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-12
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
x
Fetch Operator
limit: -1
+ z
+ Fetch Operator
+ limit: -1
Alias -> Map Local Operator Tree:
x
TableScan
@@ -52,8 +52,21 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
+ z
+ TableScan
+ alias: z
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col5} {_col0}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ Position of Big Table: 0
- Stage: Stage-8
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
y
@@ -72,24 +85,54 @@
1 [Column[key]]
outputColumnNames: _col0, _col1, _col5
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col5} {_col0}
+ 1 {value}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col1]]
+ 1 [Column[value]]
+ outputColumnNames: _col1, _col4, _col9
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col4
+ type: string
+ expr: _col9
+ type: string
+ expr: _col1
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col5
- columns.types string,string,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value,val2
+ columns.types string:string:string
+#### A masked pattern was here ####
+ name default.dest_j1
+ serialization.ddl struct dest_j1 { string key, string value, string val2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_j1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
- Needs Tagging: false
+ Needs Tagging: true
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -135,202 +178,25 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src
name: default.src
- Truncated Path -> Alias:
- /src [y]
-
- Stage: Stage-11
- Map Reduce Local Work
- Alias -> Map Local Tables:
- z
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- z
- TableScan
- alias: z
- GatherStats: false
- HashTable Sink Operator
- condition expressions:
- 0 {_col5} {_col0}
- 1 {value}
- handleSkewJoin: false
- keys:
- 0 [Column[_col1]]
- 1 [Column[value]]
- Position of Big Table: 0
-
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col5
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- condition expressions:
- 0 {_col5} {_col0}
- 1 {value}
- handleSkewJoin: false
- keys:
- 0 [Column[_col1]]
- 1 [Column[value]]
- outputColumnNames: _col1, _col4, _col9
- Position of Big Table: 0
- Select Operator
- expressions:
- expr: _col1
- type: string
- expr: _col4
- type: string
- expr: _col9
- type: string
- outputColumnNames: _col1, _col4, _col9
- Select Operator
- expressions:
- expr: _col4
- type: string
- expr: _col9
- type: string
- expr: _col1
- type: string
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Local Work:
- Map Reduce Local Work
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
Partition
- base file name: -mr-10003
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col5
- columns.types string,string,string
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col5
- columns.types string,string,string
- escape.delim \
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-7
- Conditional Operator
-
- Stage: Stage-4
- Move Operator
- files:
- hdfs directory: true
-#### A masked pattern was here ####
-
- Stage: Stage-0
- Move Operator
- tables:
- replace: true
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
-#### A masked pattern was here ####
-
- Stage: Stage-2
- Stats-Aggr Operator
-#### A masked pattern was here ####
-
- Stage: Stage-3
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
+ base file name: src1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
- columns key,value,val2
- columns.types string:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -338,68 +204,31 @@
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
- columns key,value,val2
- columns.types string:string:string
+ columns key,value
+ columns.types string:string
#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
+ name default.src1
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src1 { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 216
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- name: default.dest_j1
+ name: default.src1
+ name: default.src1
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /src [y]
- Stage: Stage-5
- Map Reduce
- Alias -> Map Operator Tree:
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
#### A masked pattern was here ####
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_j1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns key,value,val2
- columns.types string:string:string
-#### A masked pattern was here ####
- name default.dest_j1
- serialization.ddl struct dest_j1 { string key, string value, string val2}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
+ table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -414,19 +243,15 @@
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
- name: default.dest_j1
- Truncated Path -> Alias:
#### A masked pattern was here ####
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
+ Stage: Stage-2
+ Stats-Aggr Operator
#### A masked pattern was here ####
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
@@ -436,7 +261,7 @@
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Output: default@dest_j1
POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
Index: ql/src/test/results/clientpositive/bucketcontext_6.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketcontext_6.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketcontext_6.q.out (working copy)
@@ -68,13 +68,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -120,21 +119,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -237,48 +235,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -309,7 +265,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
@@ -340,7 +297,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -362,21 +318,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -477,48 +432,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -549,7 +462,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketcontext_1.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketcontext_1.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketcontext_1.q.out (working copy)
@@ -81,13 +81,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -133,21 +132,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -250,48 +248,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -322,7 +278,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
@@ -355,7 +312,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -377,21 +333,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -492,48 +447,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big/ds=2008-04-08 [b]
- /bucket_big/ds=2008-04-09 [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -564,7 +477,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big/ds=2008-04-08 [b]
+ /bucket_big/ds=2008-04-09 [b]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/bucketmapjoin10.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketmapjoin10.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketmapjoin10.q.out (working copy)
@@ -118,13 +118,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -163,21 +162,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -278,48 +276,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
- /srcbucket_mapjoin_part_1/part=2 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -350,7 +306,8 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
+ /srcbucket_mapjoin_part_1/part=2 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/smb_mapjoin_15.q.out
===================================================================
--- ql/src/test/results/clientpositive/smb_mapjoin_15.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/smb_mapjoin_15.q.out (working copy)
@@ -51,7 +51,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -74,21 +73,43 @@
1 [Column[key]]
outputColumnNames: _col0, _col1, _col4, _col5
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: int
+ expr: _col3
+ type: string
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -139,70 +160,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table1
name: default.test_table1
- Truncated Path -> Alias:
- /test_table1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col4, _col5
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col2
- type: int
- expr: _col3
- type: string
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col4,_col5
- columns.types int,string,int,string
- escape.delim \
Reduce Operator Tree:
Extract
Limit
@@ -224,7 +181,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /test_table1 [a]
Stage: Stage-0
Fetch Operator
@@ -346,7 +303,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -369,21 +325,55 @@
1 [Column[key], Column[key2]]
outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col5
+ type: int
+ expr: _col6
+ type: int
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col5
+ type: int
+ expr: _col6
+ type: int
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col3
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -434,82 +424,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table1
name: default.test_table1
- Truncated Path -> Alias:
- /test_table1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col5
- type: int
- expr: _col6
- type: int
- expr: _col7
- type: string
- outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col5
- type: int
- expr: _col6
- type: int
- expr: _col7
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col3
- type: int
- expr: _col4
- type: int
- expr: _col5
- type: string
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
Reduce Operator Tree:
Extract
Limit
@@ -531,12 +445,13 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /test_table1 [a]
Stage: Stage-0
Fetch Operator
limit: 10
+
PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1
@@ -592,7 +507,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -615,21 +529,55 @@
1 [Column[key2], Column[key]]
outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col5
+ type: int
+ expr: _col6
+ type: int
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col5
+ type: int
+ expr: _col6
+ type: int
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col3
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -680,82 +628,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table1
name: default.test_table1
- Truncated Path -> Alias:
- /test_table1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col5
- type: int
- expr: _col6
- type: int
- expr: _col7
- type: string
- outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col5
- type: int
- expr: _col6
- type: int
- expr: _col7
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col3
- type: int
- expr: _col4
- type: int
- expr: _col5
- type: string
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
Reduce Operator Tree:
Extract
Limit
@@ -777,12 +649,13 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /test_table1 [a]
Stage: Stage-0
Fetch Operator
limit: 10
+
PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1
@@ -837,13 +710,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -890,21 +762,55 @@
1 [Column[key], Column[value]]
outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col5
+ type: int
+ expr: _col6
+ type: int
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col5
+ type: int
+ expr: _col6
+ type: int
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ expr: _col2
+ type: string
+ expr: _col3
+ type: int
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -957,82 +863,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table1
name: default.test_table1
- Truncated Path -> Alias:
- /test_table1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col5
- type: int
- expr: _col6
- type: int
- expr: _col7
- type: string
- outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col5
- type: int
- expr: _col6
- type: int
- expr: _col7
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: int
- expr: _col1
- type: int
- expr: _col2
- type: string
- expr: _col3
- type: int
- expr: _col4
- type: int
- expr: _col5
- type: string
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col5,_col6,_col7
- columns.types int,int,string,int,int,string
- escape.delim \
Reduce Operator Tree:
Extract
Limit
@@ -1054,7 +884,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /test_table1 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/mapjoin_distinct.q.out
===================================================================
--- ql/src/test/results/clientpositive/mapjoin_distinct.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/mapjoin_distinct.q.out (working copy)
@@ -14,14 +14,13 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value)))))
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-1 depends on stages: Stage-5
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
Stage-2 depends on stages: Stage-1
- Stage-3 depends on stages: Stage-2
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
d
@@ -59,45 +58,34 @@
1 [Column[key]]
outputColumnNames: _col1
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col1
- type: string
- outputColumnNames: _col1
- Select Operator
- expressions:
- expr: _col1
- type: string
- outputColumnNames: _col1
- Group By Operator
- bucketGroup: false
- keys:
+ Select Operator
+ expressions:
expr: _col1
type: string
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- key expressions:
- expr: _col0
+ outputColumnNames: _col1
+ Select Operator
+ expressions:
+ expr: _col1
type: string
- sort order: +
- Map-reduce partition columns:
- expr: rand()
- type: double
- tag: -1
+ outputColumnNames: _col1
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: _col1
+ type: string
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ Local Work:
+ Map Reduce Local Work
Reduce Operator Tree:
Group By Operator
bucketGroup: false
@@ -113,7 +101,7 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-3
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
@@ -195,13 +183,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
d
@@ -239,45 +226,34 @@
1 [Column[key]]
outputColumnNames: _col1
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col1
- type: string
- outputColumnNames: _col1
- Select Operator
- expressions:
- expr: _col1
- type: string
- outputColumnNames: _col1
- Group By Operator
- bucketGroup: false
- keys:
+ Select Operator
+ expressions:
expr: _col1
type: string
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- key expressions:
- expr: _col0
+ outputColumnNames: _col1
+ Select Operator
+ expressions:
+ expr: _col1
type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
+ outputColumnNames: _col1
+ Group By Operator
+ bucketGroup: false
+ keys:
+ expr: _col1
+ type: string
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ Local Work:
+ Map Reduce Local Work
Reduce Operator Tree:
Group By Operator
bucketGroup: false
@@ -347,14 +323,13 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value)))))
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-1 depends on stages: Stage-5
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
Stage-2 depends on stages: Stage-1
- Stage-3 depends on stages: Stage-2
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
d
@@ -392,38 +367,27 @@
1 [Column[key]]
outputColumnNames: _col1
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ outputColumnNames: _col1
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ outputColumnNames: _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col1
- type: string
- outputColumnNames: _col1
- Select Operator
- expressions:
- expr: _col1
- type: string
- outputColumnNames: _col1
- Reduce Output Operator
- key expressions:
- expr: _col1
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: rand()
- type: double
- tag: -1
Reduce Operator Tree:
Group By Operator
bucketGroup: false
@@ -439,7 +403,7 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-3
+ Stage: Stage-2
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
@@ -521,13 +485,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcpart) c) (TOK_TABREF (TOK_TABNAME srcpart) d) (AND (AND (= (. (TOK_TABLE_OR_COL c) key) (. (TOK_TABLE_OR_COL d) key)) (= (. (TOK_TABLE_OR_COL c) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL d) ds) '2008-04-08')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST d))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL c) value)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
d
@@ -565,38 +528,27 @@
1 [Column[key]]
outputColumnNames: _col1
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ outputColumnNames: _col1
+ Select Operator
+ expressions:
+ expr: _col1
+ type: string
+ outputColumnNames: _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col1
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col1
+ type: string
+ tag: -1
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col1
- type: string
- outputColumnNames: _col1
- Select Operator
- expressions:
- expr: _col1
- type: string
- outputColumnNames: _col1
- Reduce Output Operator
- key expressions:
- expr: _col1
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col1
- type: string
- tag: -1
Reduce Operator Tree:
Group By Operator
bucketGroup: false
Index: ql/src/test/results/clientpositive/semijoin.q.out
===================================================================
--- ql/src/test/results/clientpositive/semijoin.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/semijoin.q.out (working copy)
@@ -1137,13 +1137,12 @@
(TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -1193,38 +1192,27 @@
1 [Column[_col0]]
outputColumnNames: _col0
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: int
Reduce Operator Tree:
Extract
File Output Operator
@@ -1712,13 +1700,12 @@
(TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF (TOK_TABNAME t3) a) (TOK_TABREF (TOK_TABNAME t1) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME t2) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-1 depends on stages: Stage-5
- Stage-2 depends on stages: Stage-1
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -1802,38 +1789,27 @@
2 [Column[_col0]]
outputColumnNames: _col0
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
Local Work:
Map Reduce Local Work
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Select Operator
- expressions:
- expr: _col0
- type: int
- outputColumnNames: _col0
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- tag: -1
- value expressions:
- expr: _col0
- type: int
Reduce Operator Tree:
Extract
File Output Operator
Index: ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out
===================================================================
--- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out (working copy)
@@ -70,13 +70,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -122,21 +121,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -191,47 +189,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_1
name: default.srcbucket_mapjoin_part_1
- Truncated Path -> Alias:
- /srcbucket_mapjoin_part_1/part=1 [a]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -262,7 +219,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /srcbucket_mapjoin_part_1/part=1 [a]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out
===================================================================
--- ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out (working copy)
@@ -59,7 +59,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -85,35 +84,24 @@
1 [Column[key]]
outputColumnNames: _col0
Position of Big Table: 0
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
+ Select Operator
+ expressions:
expr: _col0
- type: bigint
+ type: string
+ outputColumnNames: _col0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
Index: ql/src/test/results/clientpositive/join31.q.out
===================================================================
--- ql/src/test/results/clientpositive/join31.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/join31.q.out (working copy)
@@ -3,30 +3,38 @@
POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt INT)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@dest_j1
-PREHOOK: query: EXPLAIN
+PREHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt
+SELECT subq1.key, count(1) as cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
group by subq1.key
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: -- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt
+SELECT subq1.key, count(1) as cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
group by subq1.key
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST subq1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL subq1) key))))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) y)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL y) key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_j1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL subq1) key))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-7 depends on stages: Stage-1, Stage-5
- Stage-2 depends on stages: Stage-7
- Stage-3 depends on stages: Stage-2
+ Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2
+ Stage-9 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-9
+ Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
Stage-0 depends on stages: Stage-3
Stage-4 depends on stages: Stage-0
+ Stage-10 has a backup stage: Stage-2
+ Stage-7 depends on stages: Stage-10
+ Stage-2
Stage-5 is a root stage
STAGE PLANS:
@@ -84,14 +92,17 @@
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- Stage: Stage-7
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-9
Map Reduce Local Work
Alias -> Map Local Tables:
-#### A masked pattern was here ####
+ $INTNAME
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
-#### A masked pattern was here ####
+ $INTNAME
HashTable Sink Operator
condition expressions:
0 {_col0}
@@ -100,12 +111,12 @@
keys:
0 [Column[_col0]]
1 [Column[_col0]]
- Position of Big Table: 1
+ Position of Big Table: 0
- Stage: Stage-2
+ Stage: Stage-6
Map Reduce
Alias -> Map Operator Tree:
-#### A masked pattern was here ####
+ $INTNAME1
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -117,13 +128,27 @@
0 [Column[_col0]]
1 [Column[_col0]]
outputColumnNames: _col0
- Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Local Work:
Map Reduce Local Work
@@ -131,37 +156,18 @@
Map Reduce
Alias -> Map Operator Tree:
#### A masked pattern was here ####
- Select Operator
- expressions:
- expr: _col0
- type: string
- outputColumnNames: _col0
- Select Operator
- expressions:
+ Reduce Output Operator
+ key expressions:
expr: _col0
type: string
- outputColumnNames: _col0
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- keys:
- expr: _col0
- type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -208,6 +214,119 @@
Stage: Stage-4
Stats-Aggr Operator
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+
+ Stage: Stage-7
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ outputColumnNames: _col0
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ $INTNAME
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 1
+ $INTNAME1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: 0
+ value expressions:
+ expr: _col0
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1
+ handleSkewJoin: false
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
Stage: Stage-5
Map Reduce
Alias -> Map Operator Tree:
@@ -264,7 +383,7 @@
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt
+SELECT subq1.key, count(1) as cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
group by subq1.key
@@ -273,7 +392,7 @@
PREHOOK: Input: default@src1
PREHOOK: Output: default@dest_j1
POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt
+SELECT subq1.key, count(1) as cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
group by subq1.key
Index: ql/src/test/results/clientpositive/bucketcontext_5.q.out
===================================================================
--- ql/src/test/results/clientpositive/bucketcontext_5.q.out (revision 1438313)
+++ ql/src/test/results/clientpositive/bucketcontext_5.q.out (working copy)
@@ -54,13 +54,12 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bucket_small) a) (TOK_TABREF (TOK_TABNAME bucket_big) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-1 depends on stages: Stage-4
- Stage-2 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-4
+ Stage: Stage-3
Map Reduce Local Work
Alias -> Map Local Tables:
a
@@ -106,21 +105,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -173,47 +171,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -244,7 +201,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big [b]
Stage: Stage-0
Fetch Operator
@@ -271,7 +228,6 @@
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -293,21 +249,20 @@
0 [Column[key]]
1 [Column[key]]
Position of Big Table: 1
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -358,47 +313,6 @@
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucket_big
name: default.bucket_big
- Truncated Path -> Alias:
- /bucket_big [b]
-
- Stage: Stage-2
- Map Reduce
- Alias -> Map Operator Tree:
-#### A masked pattern was here ####
- Select Operator
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
- Needs Tagging: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: -mr-10002
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
-
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns
- columns.types
- escape.delim \
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -429,7 +343,7 @@
GatherStats: false
MultiFileSpray: false
Truncated Path -> Alias:
-#### A masked pattern was here ####
+ /bucket_big [b]
Stage: Stage-0
Fetch Operator
Index: ql/src/test/results/clientnegative/join29.q.out
===================================================================
--- ql/src/test/results/clientnegative/join29.q.out (revision 0)
+++ ql/src/test/results/clientnegative/join29.q.out (working copy)
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_j1
+FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint.
Index: ql/src/test/results/clientnegative/join35.q.out
===================================================================
--- ql/src/test/results/clientnegative/join35.q.out (revision 0)
+++ ql/src/test/results/clientnegative/join35.q.out (working copy)
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_j1
+FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint.
Index: ql/src/test/results/clientnegative/join28.q.out
===================================================================
--- ql/src/test/results/clientnegative/join28.q.out (revision 0)
+++ ql/src/test/results/clientnegative/join28.q.out (working copy)
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_j1
+FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint.
Index: ql/src/test/results/clientnegative/union22.q.out
===================================================================
--- ql/src/test/results/clientnegative/union22.q.out (revision 0)
+++ ql/src/test/results/clientnegative/union22.q.out (working copy)
@@ -0,0 +1,45 @@
+PREHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dst_union22
+PREHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dst_union22_delta
+PREHOOK: query: insert overwrite table dst_union22 partition (ds='1')
+select key, value, key , value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dst_union22@ds=1
+POSTHOOK: query: insert overwrite table dst_union22 partition (ds='1')
+select key, value, key , value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dst_union22@ds=1
+POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table dst_union22_delta partition (ds='1')
+select key, key, value, key, value, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dst_union22_delta@ds=1
+POSTHOOK: query: insert overwrite table dst_union22_delta partition (ds='1')
+select key, key, value, key, value, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dst_union22_delta@ds=1
+POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dst_union22_delta PARTITION(ds=1).k5 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint.
Index: ql/src/test/results/clientnegative/join32.q.out
===================================================================
--- ql/src/test/results/clientnegative/join32.q.out (revision 0)
+++ ql/src/test/results/clientnegative/join32.q.out (working copy)
@@ -0,0 +1,6 @@
+PREHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_j1
+FAILED: SemanticException [Error 10227]: All operators are not allowed with mapjoin hint. Remove the mapjoin hint.
Index: ql/src/test/queries/clientpositive/join32.q
===================================================================
--- ql/src/test/queries/clientpositive/join32.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/join32.q (working copy)
@@ -1,13 +1,19 @@
CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
+
+-- Since the inputs are small, it should be automatically converted to mapjoin
+
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
Index: ql/src/test/queries/clientpositive/mapjoin_subquery2.q
===================================================================
--- ql/src/test/queries/clientpositive/mapjoin_subquery2.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/mapjoin_subquery2.q (working copy)
@@ -15,25 +15,25 @@
load data local inpath '../data/files/y.txt' INTO TABLE y;
load data local inpath '../data/files/z.txt' INTO TABLE z;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
+
+-- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
FROM
(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2
FROM y JOIN x ON (x.id = y.id)) subq
JOIN z ON (subq.key1 = z.id);
-EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
+SELECT subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
FROM
-(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2
+(SELECT x.id as key1, x.name as value1, y.id as key2, y.name as value2
FROM y JOIN x ON (x.id = y.id)) subq
JOIN z ON (subq.key1 = z.id);
-SELECT /*+ MAPJOIN(z) */ subq.key1, subq.value1, subq.key2, subq.value2, z.id, z.name
-FROM
-(SELECT /*+ MAPJOIN(x) */ x.id as key1, x.name as value1, y.id as key2, y.name as value2
- FROM y JOIN x ON (x.id = y.id)) subq
- JOIN z ON (subq.key1 = z.id);
-
drop table x;
drop table y;
drop table z;
Index: ql/src/test/queries/clientpositive/join29.q
===================================================================
--- ql/src/test/queries/clientpositive/join29.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/join29.q (working copy)
@@ -1,13 +1,19 @@
CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT);
-EXPLAIN
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
+
+-- Since the inputs are small, it should be automatically converted to mapjoin
+
+EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt
+SELECT subq1.key, subq1.cnt, subq2.cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key);
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt
+SELECT subq1.key, subq1.cnt, subq2.cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key);
Index: ql/src/test/queries/clientpositive/mapjoin_subquery.q
===================================================================
--- ql/src/test/queries/clientpositive/mapjoin_subquery.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/mapjoin_subquery.q (working copy)
@@ -1,28 +1,34 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
+
+-- Since the inputs are small, it should be automatically converted to mapjoin
+
EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11);
-
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
- JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11);
+ JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
+ORDER BY subq.key1, z.value;
EXPLAIN
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
- order by subq.key1;
+ order by subq.key1, z.value;
-
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
- order by subq.key1;
+ order by subq.key1, z.value;
Index: ql/src/test/queries/clientpositive/union22.q
===================================================================
--- ql/src/test/queries/clientpositive/union22.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/union22.q (working copy)
@@ -1,7 +1,4 @@
-
create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string);
-
-
create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string);
insert overwrite table dst_union22 partition (ds='1')
@@ -12,13 +9,19 @@
set hive.merge.mapfiles=false;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
+
+-- Since the inputs are small, it should be automatically converted to mapjoin
+
explain extended
insert overwrite table dst_union22 partition (ds='2')
select * from
(
select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
union all
-select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
+select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
a.k1 = b.k1 and a.ds='1'
where a.k1 > 20
@@ -30,12 +33,11 @@
(
select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
union all
-select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
+select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
a.k1 = b.k1 and a.ds='1'
where a.k1 > 20
)
subq;
-
select * from dst_union22 where ds = '2' order by k1, k2, k3, k4;
Index: ql/src/test/queries/clientpositive/join33.q
===================================================================
--- ql/src/test/queries/clientpositive/join33.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/join33.q (working copy)
@@ -1,13 +1,19 @@
CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
+
+-- Since the inputs are small, it should be automatically converted to mapjoin
+
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, z.value, y.value
+SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
Index: ql/src/test/queries/clientpositive/smb_mapjoin_14.q
===================================================================
--- ql/src/test/queries/clientpositive/smb_mapjoin_14.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/smb_mapjoin_14.q (working copy)
@@ -62,43 +62,6 @@
group by key
) subq2;
--- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them.
--- Each sub-query should be converted to a sort-merge join.
-explain
-select src1.key, src1.cnt1, src2.cnt1 from
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq1 group by key
-) src1
-join
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq2 group by key
-) src2
-on src1.key = src2.key
-order by src1.key, src1.cnt1, src2.cnt1;
-
-select src1.key, src1.cnt1, src2.cnt1 from
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq1 group by key
-) src1
-join
-(
- select key, count(*) as cnt1 from
- (
- select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
- ) subq2 group by key
-) src2
-on src1.key = src2.key
-order by src1.key, src1.cnt1, src2.cnt1;
-
-- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should
-- be converted to a sort-merge join.
explain
Index: ql/src/test/queries/clientpositive/join34.q
===================================================================
--- ql/src/test/queries/clientpositive/join34.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/join34.q (working copy)
@@ -1,10 +1,14 @@
+CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
-CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE;
+-- Since the inputs are small, it should be automatically converted to mapjoin
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value
+SELECT x.key, x.value, subq1.value
FROM
( SELECT x.key as key, x.value as value from src x where x.key < 20
UNION ALL
@@ -13,7 +17,7 @@
JOIN src1 x ON (x.key = subq1.key);
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.value
+SELECT x.key, x.value, subq1.value
FROM
( SELECT x.key as key, x.value as value from src x where x.key < 20
UNION ALL
Index: ql/src/test/queries/clientpositive/multiMapJoin1.q
===================================================================
--- ql/src/test/queries/clientpositive/multiMapJoin1.q (revision 0)
+++ ql/src/test/queries/clientpositive/multiMapJoin1.q (working copy)
@@ -0,0 +1,70 @@
+-- Join of a big table with 2 small tables on different keys should be performed as a single MR job
+create table smallTbl1(key string, value string);
+insert overwrite table smallTbl1 select * from src where key < 10;
+
+create table smallTbl2(key string, value string);
+insert overwrite table smallTbl2 select * from src where key < 10;
+
+create table bigTbl(key string, value string);
+insert overwrite table bigTbl
+select * from
+(
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+ union all
+ select * from src
+) subq;
+
+set hive.auto.convert.join=true;
+
+explain
+select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value);
+
+select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value);
+
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
+
+explain
+select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value);
+
+select count(*) FROM
+(select bigTbl.key as key, bigTbl.value as value1,
+ bigTbl.value as value2 FROM bigTbl JOIN smallTbl1
+ on (bigTbl.key = smallTbl1.key)
+) firstjoin
+JOIN
+smallTbl2 on (firstjoin.value1 = smallTbl2.value);
Index: ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
===================================================================
--- ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (working copy)
@@ -1,5 +1,11 @@
-explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key);
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
-explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds;
+-- Since the inputs are small, it should be automatically converted to mapjoin
-select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
+explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key);
+
+explain select count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds;
+
+select count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
Index: ql/src/test/queries/clientpositive/join31.q
===================================================================
--- ql/src/test/queries/clientpositive/join31.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/join31.q (working copy)
@@ -1,14 +1,20 @@
CREATE TABLE dest_j1(key STRING, cnt INT);
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
+
+-- Since the inputs are small, it should be automatically converted to mapjoin
+
EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt
+SELECT subq1.key, count(1) as cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
group by subq1.key;
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(subq1) */ subq1.key, count(1) as cnt
+SELECT subq1.key, count(1) as cnt
FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
(select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key)
group by subq1.key;
Index: ql/src/test/queries/clientpositive/join35.q
===================================================================
--- ql/src/test/queries/clientpositive/join35.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/join35.q (working copy)
@@ -1,10 +1,14 @@
+CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
-CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE;
+-- Since the inputs are small, it should be automatically converted to mapjoin
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt
+SELECT x.key, x.value, subq1.cnt
FROM
( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key
UNION ALL
@@ -13,7 +17,7 @@
JOIN src1 x ON (x.key = subq1.key);
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt
+SELECT x.key, x.value, subq1.cnt
FROM
( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key
UNION ALL
Index: ql/src/test/queries/clientpositive/join28.q
===================================================================
--- ql/src/test/queries/clientpositive/join28.q (revision 1438313)
+++ ql/src/test/queries/clientpositive/join28.q (working copy)
@@ -1,19 +1,23 @@
+CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.aggressivemapjoin=true;
+set hive.auto.convert.join.aggressivemapjoin.size=10000;
-CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE;
+-- Since the inputs are small, it should be automatically converted to mapjoin
EXPLAIN
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11);
INSERT OVERWRITE TABLE dest_j1
-SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+SELECT subq.key1, z.value
FROM
-(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+(SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11);
Index: ql/src/test/queries/clientpositive/smb_mapjoin_16.q
===================================================================
--- ql/src/test/queries/clientpositive/smb_mapjoin_16.q (revision 0)
+++ ql/src/test/queries/clientpositive/smb_mapjoin_16.q (working copy)
@@ -0,0 +1,21 @@
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Create bucketed and sorted tables
+CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+FROM src
+INSERT OVERWRITE TABLE test_table1 SELECT *
+INSERT OVERWRITE TABLE test_table2 SELECT *;
+
+-- Mapjoin followed by a aggregation should be performed in a single MR job
+EXPLAIN
+SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key;
+SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key;
Index: ql/src/test/queries/clientnegative/union22.q
===================================================================
--- ql/src/test/queries/clientnegative/union22.q (revision 0)
+++ ql/src/test/queries/clientnegative/union22.q (working copy)
@@ -0,0 +1,26 @@
+create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string);
+create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string);
+
+insert overwrite table dst_union22 partition (ds='1')
+select key, value, key , value from src;
+
+insert overwrite table dst_union22_delta partition (ds='1')
+select key, key, value, key, value, value from src;
+
+set hive.merge.mapfiles=false;
+
+-- Union followed by Mapjoin is not supported.
+-- The same query would work without the hint
+
+explain extended
+insert overwrite table dst_union22 partition (ds='2')
+select * from
+(
+select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
+union all
+select /*+ MAPJOIN(b) */ a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
+from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
+a.k1 = b.k1 and a.ds='1'
+where a.k1 > 20
+)
+subq;
Index: ql/src/test/queries/clientnegative/join32.q
===================================================================
--- ql/src/test/queries/clientnegative/join32.q (revision 0)
+++ ql/src/test/queries/clientnegative/join32.q (working copy)
@@ -0,0 +1,13 @@
+CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE;
+
+-- Mapjoin followed by Mapjoin is not supported.
+-- The same query would work without the hint
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE dest_j1
+SELECT /*+ MAPJOIN(x,z) */ x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+
+
+
Index: ql/src/test/queries/clientnegative/join35.q
===================================================================
--- ql/src/test/queries/clientnegative/join35.q (revision 0)
+++ ql/src/test/queries/clientnegative/join35.q (working copy)
@@ -0,0 +1,17 @@
+CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE;
+
+-- Mapjoin followed by union is not supported.
+-- The same query would work without the hint
+EXPLAIN EXTENDED
+INSERT OVERWRITE TABLE dest_j1
+SELECT /*+ MAPJOIN(x) */ x.key, x.value, subq1.cnt
+FROM
+( SELECT x.key as key, count(1) as cnt from src x where x.key < 20 group by x.key
+ UNION ALL
+ SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key
+) subq1
+JOIN src1 x ON (x.key = subq1.key);
+
+
+
+
Index: ql/src/test/queries/clientnegative/join28.q
===================================================================
--- ql/src/test/queries/clientnegative/join28.q (revision 0)
+++ ql/src/test/queries/clientnegative/join28.q (working copy)
@@ -0,0 +1,14 @@
+CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE;
+
+-- Mapjoin followed by mapjoin is not supported.
+-- The same query would work fine without the hint.
+EXPLAIN
+INSERT OVERWRITE TABLE dest_j1
+SELECT /*+ MAPJOIN(z) */ subq.key1, z.value
+FROM
+(SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2
+ FROM src1 x JOIN src y ON (x.key = y.key)) subq
+ JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11);
+
+
+
Index: ql/src/test/queries/clientnegative/join29.q
===================================================================
--- ql/src/test/queries/clientnegative/join29.q (revision 0)
+++ ql/src/test/queries/clientnegative/join29.q (working copy)
@@ -0,0 +1,9 @@
+CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT);
+
+-- Mapjoin followed by group by is not supported.
+-- The same query would work without the hint
+EXPLAIN
+INSERT OVERWRITE TABLE dest_j1
+SELECT /*+ MAPJOIN(subq1) */ subq1.key, subq1.cnt, subq2.cnt
+FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN
+ (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key);
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink4.java (working copy)
@@ -1,98 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.optimizer;
-
-import java.io.Serializable;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Stack;
-
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.lib.Node;
-import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.MapredWork;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-
-/**
- * Processor for the rule - map join followed by reduce sink.
- */
-public class GenMRRedSink4 implements NodeProcessor {
-
- public GenMRRedSink4() {
- }
-
- /**
- * Reduce Scan encountered.
- *
- * @param nd
- * the reduce sink operator encountered
- * @param opProcCtx
- * context
- */
- public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx,
- Object... nodeOutputs) throws SemanticException {
- ReduceSinkOperator op = (ReduceSinkOperator) nd;
- GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
-
- ctx.getParseCtx();
-
- // map-join consisted on a bunch of map-only jobs, and it has been split
- // after the mapjoin
- Operator extends OperatorDesc> reducer = op.getChildOperators().get(0);
- Map, GenMapRedCtx> mapCurrCtx = ctx
- .getMapCurrCtx();
- GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
- Task extends Serializable> currTask = mapredCtx.getCurrTask();
- MapredWork plan = (MapredWork) currTask.getWork();
- HashMap, Task extends Serializable>> opTaskMap = ctx
- .getOpTaskMap();
- Task extends Serializable> opMapTask = opTaskMap.get(reducer);
-
- ctx.setCurrTask(currTask);
-
- // If the plan for this reducer does not exist, initialize the plan
- if (opMapTask == null) {
- // When the reducer is encountered for the first time
- if (plan.getReducer() == null) {
- GenMapRedUtils.initMapJoinPlan(op, ctx, true, null, true, -1);
- // When mapjoin is followed by a multi-table insert
- } else {
- GenMapRedUtils.splitPlan(op, ctx);
- }
- } else {
- // There is a join after mapjoin. One of the branches of mapjoin has already
- // been initialized.
- // Initialize the current branch, and join with the original plan.
- assert plan.getReducer() != reducer;
- GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, false, true, null);
- }
-
- mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(),
- ctx.getCurrAliasId()));
-
- // the mapjoin operator has been processed
- ctx.setCurrMapJoinOp(null);
- return null;
- }
-}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcContext.java (working copy)
@@ -39,16 +39,13 @@
private final transient boolean[] mapOnlySubq;
private final transient boolean[] mapOnlySubqSet;
private final transient boolean[] rootTask;
- private final transient boolean[] mapJoinSubq;
private transient int numInputs;
- private transient boolean mapJoinQuery;
public UnionParseContext(int numInputs) {
this.numInputs = numInputs;
mapOnlySubq = new boolean[numInputs];
rootTask = new boolean[numInputs];
- mapJoinSubq = new boolean[numInputs];
mapOnlySubqSet = new boolean[numInputs];
}
@@ -61,21 +58,6 @@
this.mapOnlySubqSet[pos] = true;
}
- public boolean getMapJoinSubq(int pos) {
- return mapJoinSubq[pos];
- }
-
- public void setMapJoinSubq(int pos, boolean mapJoinSubq) {
- this.mapJoinSubq[pos] = mapJoinSubq;
- if (mapJoinSubq) {
- mapJoinQuery = true;
- }
- }
-
- public boolean getMapJoinQuery() {
- return mapJoinQuery;
- }
-
public boolean getRootTask(int pos) {
return rootTask[pos];
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (working copy)
@@ -107,30 +107,6 @@
}
/**
- * Map-join subquery followed by Union.
- */
- public static class MapJoinUnion implements NodeProcessor {
-
- @Override
- public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- UnionOperator union = (UnionOperator) nd;
- UnionProcContext ctx = (UnionProcContext) procCtx;
-
- // find the branch on which this processor was invoked
- int pos = getPositionParent(union, stack);
- UnionParseContext uCtx = ctx.getUnionParseContext(union);
- if (uCtx == null) {
- uCtx = new UnionParseContext(union.getConf().getNumInputs());
- }
-
- uCtx.setMapJoinSubq(pos, true);
- ctx.setUnionParseContext(union, uCtx);
- return null;
- }
- }
-
- /**
* Union subquery followed by Union.
*/
public static class UnknownUnion implements NodeProcessor {
@@ -330,10 +306,6 @@
return new MapUnion();
}
- public static NodeProcessor getMapJoinUnion() {
- return new MapJoinUnion();
- }
-
public static NodeProcessor getUnknownUnion() {
return new UnknownUnion();
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (working copy)
@@ -25,7 +25,6 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
@@ -79,9 +78,6 @@
opRules.put(new RuleRegExp("R3",
TableScanOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"),
UnionProcFactory.getMapUnion());
- opRules.put(new RuleRegExp("R4",
- MapJoinOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"),
- UnionProcFactory.getMapJoinUnion());
// The dispatcher fires the processor for the matching rule and passes the
// context along
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy)
@@ -227,7 +227,7 @@
QBJoinTree newJoinTree = newWork.getJoinTree();
// generate the map join operator; already checked the map join
MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(opParseCtxMap, op,
- newJoinTree, mapJoinPos, true);
+ newJoinTree, mapJoinPos, true, false);
// generate the local work and return the big table alias
String bigTableAlias = MapJoinProcessor
.genMapJoinLocalWork(newWork, newMapJoinOp, mapJoinPos);
@@ -241,9 +241,43 @@
e.printStackTrace();
throw new SemanticException("Generate New MapJoin Opertor Exeception " + e.getMessage());
}
+ }
+ private static void checkParentOperatorType(Operator extends OperatorDesc> op)
+ throws SemanticException {
+ if (!op.opAllowedBeforeMapJoin()) {
+ throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_WITH_MAPJOIN.getMsg());
+ }
+ if (op.getParentOperators() != null) {
+ for (Operator extends OperatorDesc> parentOp : op.getParentOperators()) {
+ checkParentOperatorType(parentOp);
+ }
+ }
}
+ private static void checkChildOperatorType(Operator extends OperatorDesc> op)
+ throws SemanticException {
+ if (!op.opAllowedAfterMapJoin()) {
+ throw new SemanticException(ErrorMsg.OPERATOR_NOT_ALLOWED_WITH_MAPJOIN.getMsg());
+ }
+ if (op.getChildOperators() != null) {
+ for (Operator extends OperatorDesc> childOp : op.getChildOperators()) {
+ checkChildOperatorType(childOp);
+ }
+ }
+ }
+
+ private static void validateMapJoinTypes(Operator extends OperatorDesc> op)
+ throws SemanticException {
+ for (Operator extends OperatorDesc> parentOp : op.getParentOperators()) {
+ checkParentOperatorType(parentOp);
+ }
+
+ for (Operator extends OperatorDesc> childOp : op.getChildOperators()) {
+ checkChildOperatorType(childOp);
+ }
+ }
+
/**
* convert a regular join to a a map-side join.
*
@@ -259,8 +293,10 @@
*/
public static MapJoinOperator convertMapJoin(
LinkedHashMap, OpParseContext> opParseCtxMap,
- JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin)
+ JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin,
+ boolean validateMapJoinTree)
throws SemanticException {
+
// outer join cannot be performed on a table which is being cached
JoinDesc desc = op.getConf();
JoinCondDesc[] condns = desc.getConds();
@@ -477,6 +513,11 @@
op.setChildOperators(null);
op.setParentOperators(null);
+ // make sure only map-joins can be performed.
+ if (validateMapJoinTree) {
+ validateMapJoinTypes(mapJoinOp);
+ }
+
return mapJoinOp;
}
@@ -487,11 +528,10 @@
HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN)
&& HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN);
-
LinkedHashMap, OpParseContext> opParseCtxMap = pctx
.getOpParseCtx();
MapJoinOperator mapJoinOp = convertMapJoin(opParseCtxMap, op, joinTree, mapJoinPos,
- noCheckOuterJoin);
+ noCheckOuterJoin, true);
// create a dummy select to select all columns
genSelectPlan(pctx, mapJoinOp);
return mapJoinOp;
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (working copy)
@@ -27,7 +27,6 @@
import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -40,7 +39,6 @@
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -155,90 +153,10 @@
}
}
- /**
- * GenMRMapJoinCtx.
- *
- */
- public static class GenMRMapJoinCtx {
- String taskTmpDir;
- TableDesc tt_desc;
- Operator extends OperatorDesc> rootMapJoinOp;
- AbstractMapJoinOperator extends MapJoinDesc> oldMapJoin;
-
- public GenMRMapJoinCtx() {
- taskTmpDir = null;
- tt_desc = null;
- rootMapJoinOp = null;
- oldMapJoin = null;
- }
-
- /**
- * @param taskTmpDir
- * @param tt_desc
- * @param rootMapJoinOp
- * @param oldMapJoin
- */
- public GenMRMapJoinCtx(String taskTmpDir, TableDesc tt_desc,
- Operator extends OperatorDesc> rootMapJoinOp,
- AbstractMapJoinOperator extends MapJoinDesc> oldMapJoin) {
- this.taskTmpDir = taskTmpDir;
- this.tt_desc = tt_desc;
- this.rootMapJoinOp = rootMapJoinOp;
- this.oldMapJoin = oldMapJoin;
- }
-
- public void setTaskTmpDir(String taskTmpDir) {
- this.taskTmpDir = taskTmpDir;
- }
-
- public String getTaskTmpDir() {
- return taskTmpDir;
- }
-
- public void setTTDesc(TableDesc tt_desc) {
- this.tt_desc = tt_desc;
- }
-
- public TableDesc getTTDesc() {
- return tt_desc;
- }
-
- /**
- * @return the childSelect
- */
- public Operator extends OperatorDesc> getRootMapJoinOp() {
- return rootMapJoinOp;
- }
-
- /**
- * @param rootMapJoinOp
- * the rootMapJoinOp to set
- */
- public void setRootMapJoinOp(Operator extends OperatorDesc> rootMapJoinOp) {
- this.rootMapJoinOp = rootMapJoinOp;
- }
-
- /**
- * @return the oldMapJoin
- */
- public AbstractMapJoinOperator extends MapJoinDesc> getOldMapJoin() {
- return oldMapJoin;
- }
-
- /**
- * @param oldMapJoin
- * the oldMapJoin to set
- */
- public void setOldMapJoin(AbstractMapJoinOperator extends MapJoinDesc> oldMapJoin) {
- this.oldMapJoin = oldMapJoin;
- }
- }
-
private HiveConf conf;
private
HashMap, Task extends Serializable>> opTaskMap;
private HashMap unionTaskMap;
- private HashMap, GenMRMapJoinCtx> mapJoinTaskMap;
private List> seenOps;
private List seenFileSinkOps;
@@ -250,7 +168,6 @@
private Task extends Serializable> currTask;
private Operator extends OperatorDesc> currTopOp;
private UnionOperator currUnionOp;
- private AbstractMapJoinOperator extends MapJoinDesc> currMapJoinOp;
private String currAliasId;
private List> rootOps;
private DependencyCollectionTask dependencyTaskForMultiInsert;
@@ -313,12 +230,10 @@
currTask = null;
currTopOp = null;
currUnionOp = null;
- currMapJoinOp = null;
currAliasId = null;
rootOps = new ArrayList>();
rootOps.addAll(parseCtx.getTopOps().values());
unionTaskMap = new HashMap();
- mapJoinTaskMap = new HashMap, GenMRMapJoinCtx>();
dependencyTaskForMultiInsert = null;
linkedFileDescTasks = null;
}
@@ -488,19 +403,7 @@
this.currUnionOp = currUnionOp;
}
- public AbstractMapJoinOperator extends MapJoinDesc> getCurrMapJoinOp() {
- return currMapJoinOp;
- }
-
/**
- * @param currMapJoinOp
- * current map join operator
- */
- public void setCurrMapJoinOp(AbstractMapJoinOperator extends MapJoinDesc> currMapJoinOp) {
- this.currMapJoinOp = currMapJoinOp;
- }
-
- /**
* @return current top alias
*/
public String getCurrAliasId() {
@@ -523,14 +426,6 @@
unionTaskMap.put(op, uTask);
}
- public GenMRMapJoinCtx getMapJoinCtx(AbstractMapJoinOperator extends MapJoinDesc> op) {
- return mapJoinTaskMap.get(op);
- }
-
- public void setMapJoinCtx(AbstractMapJoinOperator extends MapJoinDesc> op, GenMRMapJoinCtx mjCtx) {
- mapJoinTaskMap.put(op, mjCtx);
- }
-
/**
* Get the input set.
*/
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (working copy)
@@ -26,7 +26,6 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
-import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.Task;
@@ -35,7 +34,6 @@
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
@@ -44,10 +42,8 @@
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
@@ -84,16 +80,10 @@
}
UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
- if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) {
- GenMapRedUtils.mergeMapJoinUnion(union, ctx,
- UnionProcFactory.getPositionParent(union, stack));
- }
- else {
- ctx.getMapCurrCtx().put(
- (Operator extends OperatorDesc>) union,
- new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(),
- ctx.getCurrAliasId()));
- }
+ ctx.getMapCurrCtx().put(
+ (Operator extends OperatorDesc>) union,
+ new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrTopOp(),
+ ctx.getCurrAliasId()));
// if the union is the first time seen, set current task to GenMRUnionCtx
uCtxTask = ctx.getUnionTask(union);
@@ -103,7 +93,7 @@
ctx.setUnionTask(union, uCtxTask);
}
- Task extends Serializable> uTask=ctx.getCurrTask();
+ Task extends Serializable> uTask = ctx.getCurrTask();
if (uTask.getParentTasks() == null
|| uTask.getParentTasks().isEmpty()) {
if (!ctx.getRootTasks().contains(uTask)) {
@@ -134,8 +124,9 @@
GenMRUnionCtx uCtxTask) {
ParseContext parseCtx = ctx.getParseCtx();
- TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
- parent.getSchema(), "temporarycol"));
+ TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
+ .getFieldSchemasFromRowSchema(
+ parent.getSchema(), "temporarycol"));
// generate the temporary file
Context baseCtx = parseCtx.getContext();
@@ -150,7 +141,7 @@
parent.getChildOperators().set(0, fs_op);
List> parentOpList =
- new ArrayList>();
+ new ArrayList>();
parentOpList.add(parent);
fs_op.setParentOperators(parentOpList);
@@ -158,7 +149,7 @@
Operator extends OperatorDesc> ts_op = OperatorFactory.get(
new TableScanDesc(), parent.getSchema());
List> childOpList =
- new ArrayList>();
+ new ArrayList>();
childOpList.add(child);
ts_op.setChildOperators(childOpList);
child.replaceParent(parent, ts_op);
@@ -212,27 +203,9 @@
}
}
- private void processSubQueryUnionMapJoin(GenMRProcContext ctx) {
- AbstractMapJoinOperator extends MapJoinDesc> mjOp = ctx.getCurrMapJoinOp();
- assert mjOp != null;
- GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mjOp);
- assert mjCtx != null;
- MapredWork plan = (MapredWork) ctx.getCurrTask().getWork();
-
- String taskTmpDir = mjCtx.getTaskTmpDir();
- TableDesc tt_desc = mjCtx.getTTDesc();
- assert plan.getPathToAliases().get(taskTmpDir) == null;
- plan.getPathToAliases().put(taskTmpDir, new ArrayList());
- plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
- plan.getPathToPartitionInfo().put(taskTmpDir,
- new PartitionDesc(tt_desc, null));
- plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
- }
-
/**
* Union Operator encountered . Currently, the algorithm is pretty simple: If
- * all the sub-queries are map-only, don't do anything. However, if there is a
- * mapjoin followed by the union, merge at the union Otherwise, insert a
+ * all the sub-queries are map-only, don't do anything. Otherwise, insert a
* FileSink on top of all the sub-queries.
*
* This can be optimized later on.
@@ -284,8 +257,7 @@
}
// Copy into the current union task plan if
- if (uPrsCtx.getMapOnlySubq(pos)
- && !uPrsCtx.getMapJoinSubq(pos) && uPrsCtx.getRootTask(pos)) {
+ if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) {
processSubQueryUnionMerge(ctx, uCtxTask, union, stack);
}
// If it a map-reduce job, create a temporary file
@@ -295,13 +267,10 @@
&& (!ctx.getRootTasks().contains(currTask))) {
ctx.getRootTasks().add(currTask);
}
- // If there is a mapjoin at position 'pos'
- if (uPrsCtx.getMapJoinSubq(pos)) {
- processSubQueryUnionMapJoin(ctx);
- }
- processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask, ctx, uCtxTask);
- //the currAliasId and CurrTopOp is not valid any more
+ processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask,
+ ctx, uCtxTask);
+ // the currAliasId and CurrTopOp is not valid any more
ctx.setCurrAliasId(null);
ctx.setCurrTopOp(null);
ctx.getOpTaskMap().put(null, uTask);
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink1.java (working copy)
@@ -81,7 +81,7 @@
} else {
// This will happen in case of joins. The current plan can be thrown away
// after being merged with the original plan
- GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false, false, null);
+ GenMapRedUtils.joinPlan(op, null, opMapTask, ctx, -1, false);
currTask = opMapTask;
ctx.setCurrTask(currTask);
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy)
@@ -20,7 +20,6 @@
import java.io.Serializable;
import java.util.ArrayList;
-import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
@@ -33,12 +32,10 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
-import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
@@ -47,19 +44,15 @@
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
-import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
-import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
-import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext;
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
@@ -97,12 +90,12 @@
throws SemanticException {
Operator extends OperatorDesc> reducer = op.getChildOperators().get(0);
Map, GenMapRedCtx> mapCurrCtx =
- opProcCtx.getMapCurrCtx();
+ opProcCtx.getMapCurrCtx();
GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
Task extends Serializable> currTask = mapredCtx.getCurrTask();
MapredWork plan = (MapredWork) currTask.getWork();
HashMap, Task extends Serializable>> opTaskMap =
- opProcCtx.getOpTaskMap();
+ opProcCtx.getOpTaskMap();
Operator extends OperatorDesc> currTopOp = opProcCtx.getCurrTopOp();
opTaskMap.put(reducer, currTask);
@@ -114,7 +107,7 @@
List> rootTasks = opProcCtx.getRootTasks();
if (!rootTasks.contains(currTask)) {
- rootTasks.add(currTask);
+ rootTasks.add(currTask);
}
if (reducer.getClass() == JoinOperator.class) {
plan.setNeedsTagging(true);
@@ -137,167 +130,8 @@
opProcCtx.setCurrAliasId(currAliasId);
}
- public static void initMapJoinPlan(
- Operator extends OperatorDesc> op, GenMRProcContext ctx,
- boolean readInputMapJoin, UnionOperator currUnionOp, boolean setReducer, int pos)
- throws SemanticException {
- initMapJoinPlan(op, ctx, readInputMapJoin, currUnionOp, setReducer, pos, false);
- }
/**
- * Initialize the current plan by adding it to root tasks.
- *
- * @param op
- * the map join operator encountered
- * @param opProcCtx
- * processing context
- * @param pos
- * position of the parent
- */
- public static void initMapJoinPlan(Operator extends OperatorDesc> op,
- GenMRProcContext opProcCtx, boolean readInputMapJoin,
- UnionOperator currUnionOp, boolean setReducer, int pos, boolean createLocalPlan)
- throws SemanticException {
- Map, GenMapRedCtx> mapCurrCtx =
- opProcCtx.getMapCurrCtx();
- assert (((pos == -1) && (readInputMapJoin)) || (pos != -1));
- int parentPos = (pos == -1) ? 0 : pos;
- GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(
- parentPos));
- Task extends Serializable> currTask = mapredCtx.getCurrTask();
- MapredWork plan = (MapredWork) currTask.getWork();
- HashMap, Task extends Serializable>> opTaskMap =
- opProcCtx.getOpTaskMap();
- Operator extends OperatorDesc> currTopOp = opProcCtx.getCurrTopOp();
-
- // The mapjoin has already been encountered. Some context must be stored
- // about that
- if (readInputMapJoin) {
- AbstractMapJoinOperator extends MapJoinDesc> currMapJoinOp = opProcCtx.getCurrMapJoinOp();
- assert currMapJoinOp != null;
- boolean local = ((pos == -1) || (pos == (currMapJoinOp.getConf()).getPosBigTable())) ?
- false : true;
-
- if (setReducer) {
- Operator extends OperatorDesc> reducer = op.getChildOperators().get(0);
- plan.setReducer(reducer);
- opTaskMap.put(reducer, currTask);
- if (reducer.getClass() == JoinOperator.class) {
- plan.setNeedsTagging(true);
- }
- ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf();
- plan.setNumReduceTasks(desc.getNumReducers());
- } else {
- opTaskMap.put(op, currTask);
- }
-
- if (currUnionOp == null) {
- GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(currMapJoinOp);
- String taskTmpDir;
- TableDesc tt_desc;
- Operator extends OperatorDesc> rootOp;
-
- if (mjCtx.getOldMapJoin() == null || setReducer) {
- taskTmpDir = mjCtx.getTaskTmpDir();
- tt_desc = mjCtx.getTTDesc();
- rootOp = mjCtx.getRootMapJoinOp();
- } else {
- GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(mjCtx
- .getOldMapJoin());
- taskTmpDir = oldMjCtx.getTaskTmpDir();
- tt_desc = oldMjCtx.getTTDesc();
- rootOp = oldMjCtx.getRootMapJoinOp();
- }
-
- setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc);
- setupBucketMapJoinInfo(plan, currMapJoinOp, createLocalPlan);
- } else {
- initUnionPlan(opProcCtx, currUnionOp, currTask, false);
- }
-
- opProcCtx.setCurrMapJoinOp(null);
- } else {
- MapJoinDesc desc = (MapJoinDesc) op.getConf();
-
- // The map is overloaded to keep track of mapjoins also
- opTaskMap.put(op, currTask);
-
- List> rootTasks = opProcCtx.getRootTasks();
- if (!rootTasks.contains(currTask)) {
- rootTasks.add(currTask);
- }
-
- assert currTopOp != null;
- List> seenOps = opProcCtx.getSeenOps();
- String currAliasId = opProcCtx.getCurrAliasId();
-
- seenOps.add(currTopOp);
- boolean local = (pos == desc.getPosBigTable()) ? false : true;
- setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx);
- setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator extends MapJoinDesc>)op, createLocalPlan);
- }
-
- opProcCtx.setCurrTask(currTask);
- opProcCtx.setCurrTopOp(null);
- opProcCtx.setCurrAliasId(null);
- }
-
- private static void setupBucketMapJoinInfo(MapredWork plan,
- AbstractMapJoinOperator extends MapJoinDesc> currMapJoinOp, boolean createLocalPlan) {
- if (currMapJoinOp != null) {
- Map>> aliasBucketFileNameMapping =
- currMapJoinOp.getConf().getAliasBucketFileNameMapping();
- if(aliasBucketFileNameMapping!= null) {
- MapredLocalWork localPlan = plan.getMapLocalWork();
- if(localPlan == null) {
- if(currMapJoinOp instanceof SMBMapJoinOperator) {
- localPlan = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork();
- }
- if (localPlan == null && createLocalPlan) {
- localPlan = new MapredLocalWork(
- new LinkedHashMap>(),
- new LinkedHashMap());
- }
- } else {
- //local plan is not null, we want to merge it into SMBMapJoinOperator's local work
- if(currMapJoinOp instanceof SMBMapJoinOperator) {
- MapredLocalWork smbLocalWork = ((SMBMapJoinOperator)currMapJoinOp).getConf().getLocalWork();
- if(smbLocalWork != null) {
- localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
- localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
- }
- }
- }
-
- if(localPlan == null) {
- return;
- }
-
- if(currMapJoinOp instanceof SMBMapJoinOperator) {
- plan.setMapLocalWork(null);
- ((SMBMapJoinOperator)currMapJoinOp).getConf().setLocalWork(localPlan);
- } else {
- plan.setMapLocalWork(localPlan);
- }
- BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext();
- localPlan.setBucketMapjoinContext(bucketMJCxt);
- bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
- bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBigTableBucketNumMapping());
- localPlan.setInputFileChangeSensitive(true);
- bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias());
- bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
- bucketMJCxt.setBigTablePartSpecToFileMapping(
- currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
- // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join
- if ((currMapJoinOp instanceof SMBMapJoinOperator)
- || (currMapJoinOp.getConf().isBucketMapJoin())) {
- plan.setUseBucketizedHiveInputFormat(true);
- }
- }
- }
- }
-
- /**
* Initialize the current union plan.
*
* @param op
@@ -312,7 +146,7 @@
MapredWork plan = (MapredWork) unionTask.getWork();
HashMap, Task extends Serializable>> opTaskMap =
- opProcCtx.getOpTaskMap();
+ opProcCtx.getOpTaskMap();
opTaskMap.put(reducer, unionTask);
plan.setReducer(reducer);
@@ -377,6 +211,7 @@
Task extends Serializable> currTask, boolean local)
throws SemanticException {
MapredWork plan = (MapredWork) currTask.getWork();
+
// In case of lateral views followed by a join, the same tree
// can be traversed more than one
if (currUnionOp != null) {
@@ -433,13 +268,6 @@
opProcCtx.setCurrTask(existingTask);
}
- public static void joinPlan(Operator extends OperatorDesc> op,
- Task extends Serializable> oldTask, Task extends Serializable> task,
- GenMRProcContext opProcCtx, int pos, boolean split,
- boolean readMapJoinData, UnionOperator currUnionOp) throws SemanticException {
- joinPlan(op, oldTask, task, opProcCtx, pos, split, readMapJoinData, currUnionOp, false);
- }
-
/**
* Merge the current task with the task for the current reducer.
*
@@ -456,8 +284,7 @@
*/
public static void joinPlan(Operator extends OperatorDesc> op,
Task extends Serializable> oldTask, Task extends Serializable> task,
- GenMRProcContext opProcCtx, int pos, boolean split,
- boolean readMapJoinData, UnionOperator currUnionOp, boolean createLocalWork)
+ GenMRProcContext opProcCtx, int pos, boolean split)
throws SemanticException {
Task extends Serializable> currTask = task;
MapredWork plan = (MapredWork) currTask.getWork();
@@ -493,53 +320,15 @@
: true;
}
setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx);
- if(op instanceof AbstractMapJoinOperator) {
- setupBucketMapJoinInfo(plan, (AbstractMapJoinOperator extends MapJoinDesc>)op, createLocalWork);
- }
}
currTopOp = null;
opProcCtx.setCurrTopOp(currTopOp);
- } else if (opProcCtx.getCurrMapJoinOp() != null) {
- AbstractMapJoinOperator extends MapJoinDesc> mjOp = opProcCtx.getCurrMapJoinOp();
- if (currUnionOp != null) {
- initUnionPlan(opProcCtx, currUnionOp, currTask, false);
- } else {
- GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp);
-
- // In case of map-join followed by map-join, the file needs to be
- // obtained from the old map join
- AbstractMapJoinOperator extends MapJoinDesc> oldMapJoin = mjCtx.getOldMapJoin();
- String taskTmpDir = null;
- TableDesc tt_desc = null;
- Operator extends OperatorDesc> rootOp = null;
-
- boolean local = ((pos == -1) || (pos == (mjOp.getConf())
- .getPosBigTable())) ? false : true;
- if (oldMapJoin == null) {
- if (opProcCtx.getParseCtx().getListMapJoinOpsNoReducer().contains(mjOp)
- || local || (oldTask != null) && (parTasks != null)) {
- taskTmpDir = mjCtx.getTaskTmpDir();
- tt_desc = mjCtx.getTTDesc();
- rootOp = mjCtx.getRootMapJoinOp();
- }
- } else {
- GenMRMapJoinCtx oldMjCtx = opProcCtx.getMapJoinCtx(oldMapJoin);
- assert oldMjCtx != null;
- taskTmpDir = oldMjCtx.getTaskTmpDir();
- tt_desc = oldMjCtx.getTTDesc();
- rootOp = oldMjCtx.getRootMapJoinOp();
- }
-
- setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc);
- setupBucketMapJoinInfo(plan, oldMapJoin, createLocalWork);
- }
- opProcCtx.setCurrMapJoinOp(null);
}
if ((oldTask != null) && (parTasks != null)) {
for (Task extends Serializable> parTask : parTasks) {
parTask.addDependentTask(currTask);
- if(opProcCtx.getRootTasks().contains(currTask)) {
+ if (opProcCtx.getRootTasks().contains(currTask)) {
opProcCtx.getRootTasks().remove(currTask);
}
}
@@ -557,7 +346,7 @@
* processing context
*/
public static void splitPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
- throws SemanticException {
+ throws SemanticException {
// Generate a new task
ParseContext parseCtx = opProcCtx.getParseCtx();
MapredWork cplan = getMapRedWork(parseCtx);
@@ -572,7 +361,7 @@
cplan.setNumReduceTasks(new Integer(desc.getNumReducers()));
HashMap, Task extends Serializable>> opTaskMap =
- opProcCtx.getOpTaskMap();
+ opProcCtx.getOpTaskMap();
opTaskMap.put(reducer, redTask);
Task extends Serializable> currTask = opProcCtx.getCurrTask();
@@ -622,7 +411,6 @@
return currentInput;
}
-
/**
* set the current task in the mapredWork.
*
@@ -657,12 +445,12 @@
if (partsList == null) {
try {
- partsList = parseCtx.getOpToPartList().get((TableScanOperator)topOp);
+ partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp);
if (partsList == null) {
partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
- parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(),
- alias_id, parseCtx.getPrunedPartitions());
- parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList);
+ parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(),
+ alias_id, parseCtx.getPrunedPartitions());
+ parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList);
}
} catch (SemanticException e) {
throw e;
@@ -701,7 +489,8 @@
long sizeNeeded = Integer.MAX_VALUE;
int fileLimit = -1;
if (parseCtx.getGlobalLimitCtx().isEnable()) {
- long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
+ long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(),
+ HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow;
// for the optimization that reduce number of input file, we limit number
// of files allowed. If more than specific number of files have to be
@@ -709,7 +498,7 @@
// inputs can cause unpredictable latency. It's not necessarily to be
// cheaper.
fileLimit =
- HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);
+ HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);
if (sizePerRow <= 0 || fileLimit <= 0) {
LOG.info("Skip optimization to reduce input size of 'limit'");
@@ -735,6 +524,7 @@
// partitioned table and whether any partition is selected or not
PlanUtils.addInput(inputs,
new ReadEntity(parseCtx.getTopToTable().get(topOp), parentViewInfo));
+
for (Partition part : parts) {
if (part.getTable().isPartitioned()) {
PlanUtils.addInput(inputs, new ReadEntity(part, parentViewInfo));
@@ -907,7 +697,7 @@
Operator extends OperatorDesc> topOp, MapredWork plan, boolean local,
TableDesc tt_desc) throws SemanticException {
- if(path == null || alias == null) {
+ if (path == null || alias == null) {
return;
}
@@ -989,8 +779,8 @@
MapredWork work = new MapredWork();
boolean mapperCannotSpanPartns =
- conf.getBoolVar(
- HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
+ conf.getBoolVar(
+ HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
work.setPathToAliases(new LinkedHashMap>());
work.setPathToPartitionInfo(new LinkedHashMap());
@@ -1071,7 +861,7 @@
// replace the reduce child with this operator
List> childOpList = parent
- .getChildOperators();
+ .getChildOperators();
for (int pos = 0; pos < childOpList.size(); pos++) {
if (childOpList.get(pos) == op) {
childOpList.set(pos, fs_op);
@@ -1080,7 +870,7 @@
}
List> parentOpList =
- new ArrayList>();
+ new ArrayList>();
parentOpList.add(parent);
fs_op.setParentOperators(parentOpList);
@@ -1096,7 +886,7 @@
op.getParentOperators().set(posn, ts_op);
Map, GenMapRedCtx> mapCurrCtx =
- opProcCtx.getMapCurrCtx();
+ opProcCtx.getMapCurrCtx();
mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null));
String streamDesc = taskTmpDir;
@@ -1124,101 +914,12 @@
// Add the path to alias mapping
setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc);
-
- // This can be cleaned up as a function table in future
- if (op instanceof AbstractMapJoinOperator>) {
- AbstractMapJoinOperator extends MapJoinDesc> mjOp = (AbstractMapJoinOperator extends MapJoinDesc>) op;
- opProcCtx.setCurrMapJoinOp(mjOp);
- GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp);
- if (mjCtx == null) {
- mjCtx = new GenMRMapJoinCtx(taskTmpDir, tt_desc, ts_op, null);
- } else {
- mjCtx.setTaskTmpDir(taskTmpDir);
- mjCtx.setTTDesc(tt_desc);
- mjCtx.setRootMapJoinOp(ts_op);
- }
- opProcCtx.setMapJoinCtx(mjOp, mjCtx);
- opProcCtx.getMapCurrCtx().put(parent,
- new GenMapRedCtx(childTask, null, null));
- setupBucketMapJoinInfo(cplan, mjOp, false);
- }
-
- currTopOp = null;
- String currAliasId = null;
-
- opProcCtx.setCurrTopOp(currTopOp);
- opProcCtx.setCurrAliasId(currAliasId);
+ opProcCtx.setCurrTopOp(null);
+ opProcCtx.setCurrAliasId(null);
opProcCtx.setCurrTask(childTask);
}
- public static void mergeMapJoinUnion(UnionOperator union,
- GenMRProcContext ctx, int pos) throws SemanticException {
- ParseContext parseCtx = ctx.getParseCtx();
- UnionProcContext uCtx = parseCtx.getUCtx();
-
- UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
- assert uPrsCtx != null;
-
- Task extends Serializable> currTask = ctx.getCurrTask();
-
- GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
- Task extends Serializable> uTask = null;
-
- union.getParentOperators().get(pos);
- MapredWork uPlan = null;
-
- // union is encountered for the first time
- if (uCtxTask == null) {
- uCtxTask = new GenMRUnionCtx();
- uPlan = GenMapRedUtils.getMapRedWork(parseCtx);
- uTask = TaskFactory.get(uPlan, parseCtx.getConf());
- uCtxTask.setUTask(uTask);
- ctx.setUnionTask(union, uCtxTask);
- } else {
- uTask = uCtxTask.getUTask();
- uPlan = (MapredWork) uTask.getWork();
- }
-
- // If there is a mapjoin at position 'pos'
- if (uPrsCtx.getMapJoinSubq(pos)) {
- GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(ctx.getCurrMapJoinOp());
- String taskTmpDir = mjCtx.getTaskTmpDir();
- if (uPlan.getPathToAliases().get(taskTmpDir) == null) {
- uPlan.getPathToAliases().put(taskTmpDir, new ArrayList());
- uPlan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
- uPlan.getPathToPartitionInfo().put(taskTmpDir,
- new PartitionDesc(mjCtx.getTTDesc(), null));
- uPlan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
- }
-
- for (Task t : currTask.getParentTasks()) {
- t.addDependentTask(uTask);
- }
- try {
- boolean notDone = true;
- while (notDone) {
- for (Task t : currTask.getParentTasks()) {
- t.removeDependentTask(currTask);
- }
- notDone = false;
- }
- } catch (ConcurrentModificationException e) {
- }
- } else {
- setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), uPlan, false, ctx);
- }
-
- ctx.setCurrTask(uTask);
- ctx.setCurrAliasId(null);
- ctx.setCurrTopOp(null);
- ctx.setCurrMapJoinOp(null);
-
- ctx.getMapCurrCtx().put(union,
- new GenMapRedCtx(ctx.getCurrTask(), null, null));
- }
-
private GenMapRedUtils() {
// prevent instantiation
}
-
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (working copy)
@@ -18,49 +18,38 @@
package org.apache.hadoop.hive.ql.optimizer;
import java.io.Serializable;
-import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Context;
-import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.TaskFactory;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.lib.Utils;
-import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
-import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
-import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.PlanUtils;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
/**
* Operator factory for MapJoin processing.
*/
public final class MapJoinFactory {
- public static int getPositionParent(AbstractMapJoinOperator extends MapJoinDesc> op, Stack stack) {
+ public static int getPositionParent(AbstractMapJoinOperator extends MapJoinDesc> op,
+ Stack stack) {
int pos = 0;
int size = stack.size();
assert size >= 2 && stack.get(size - 1) == op;
Operator extends OperatorDesc> parent =
- (Operator extends OperatorDesc>) stack.get(size - 2);
+ (Operator extends OperatorDesc>) stack.get(size - 2);
List> parOp = op.getParentOperators();
pos = parOp.indexOf(parent);
assert pos < parOp.size();
@@ -72,217 +61,148 @@
*/
public static class TableScanMapJoin implements NodeProcessor {
- @Override
- public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd;
- GenMRProcContext ctx = (GenMRProcContext) procCtx;
+ public static void setupBucketMapJoinInfo(MapredWork plan,
+ AbstractMapJoinOperator extends MapJoinDesc> currMapJoinOp) {
+ if (currMapJoinOp != null) {
+ Map>> aliasBucketFileNameMapping =
+ currMapJoinOp.getConf().getAliasBucketFileNameMapping();
+ if (aliasBucketFileNameMapping != null) {
+ MapredLocalWork localPlan = plan.getMapLocalWork();
+ if (localPlan == null) {
+ if (currMapJoinOp instanceof SMBMapJoinOperator) {
+ localPlan = ((SMBMapJoinOperator) currMapJoinOp).getConf().getLocalWork();
+ }
+ } else {
+ // local plan is not null, we want to merge it into SMBMapJoinOperator's local work
+ if (currMapJoinOp instanceof SMBMapJoinOperator) {
+ MapredLocalWork smbLocalWork = ((SMBMapJoinOperator) currMapJoinOp).getConf()
+ .getLocalWork();
+ if (smbLocalWork != null) {
+ localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
+ localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
+ }
+ }
+ }
- // find the branch on which this processor was invoked
- int pos = getPositionParent(mapJoin, stack);
+ if (localPlan == null) {
+ return;
+ }
- Map, GenMapRedCtx> mapCurrCtx = ctx
- .getMapCurrCtx();
- GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(
- pos));
- Task extends Serializable> currTask = mapredCtx.getCurrTask();
- MapredWork currPlan = (MapredWork) currTask.getWork();
- Operator extends OperatorDesc> currTopOp = mapredCtx.getCurrTopOp();
- String currAliasId = mapredCtx.getCurrAliasId();
- Operator extends OperatorDesc> reducer = mapJoin;
- HashMap, Task extends Serializable>> opTaskMap =
- ctx.getOpTaskMap();
- Task extends Serializable> opMapTask = opTaskMap.get(reducer);
-
- ctx.setCurrTopOp(currTopOp);
- ctx.setCurrAliasId(currAliasId);
- ctx.setCurrTask(currTask);
-
- // If the plan for this reducer does not exist, initialize the plan
- if (opMapTask == null) {
- assert currPlan.getReducer() == null;
- GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, false, null, false, pos);
- } else {
- // The current plan can be thrown away after being merged with the
- // original plan
- GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false,
- false, null);
- currTask = opMapTask;
- ctx.setCurrTask(currTask);
+ if (currMapJoinOp instanceof SMBMapJoinOperator) {
+ plan.setMapLocalWork(null);
+ ((SMBMapJoinOperator) currMapJoinOp).getConf().setLocalWork(localPlan);
+ } else {
+ plan.setMapLocalWork(localPlan);
+ }
+ BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext();
+ localPlan.setBucketMapjoinContext(bucketMJCxt);
+ bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
+ bucketMJCxt.setBucketFileNameMapping(
+ currMapJoinOp.getConf().getBigTableBucketNumMapping());
+ localPlan.setInputFileChangeSensitive(true);
+ bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias());
+ bucketMJCxt
+ .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
+ bucketMJCxt.setBigTablePartSpecToFileMapping(
+ currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
+ // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join
+ if ((currMapJoinOp instanceof SMBMapJoinOperator)
+ || (currMapJoinOp.getConf().isBucketMapJoin())) {
+ plan.setUseBucketizedHiveInputFormat(true);
+ }
+ }
}
-
- mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx
- .getCurrTopOp(), ctx.getCurrAliasId()));
- return null;
}
- }
- /**
- * ReduceSink followed by MapJoin.
- */
- public static class ReduceSinkMapJoin implements NodeProcessor {
+ /**
+ * Initialize the current plan by adding it to root tasks.
+ *
+ * @param op
+ * the map join operator encountered
+ * @param opProcCtx
+ * processing context
+ * @param pos
+ * position of the parent
+ */
+ private static void initMapJoinPlan(AbstractMapJoinOperator extends MapJoinDesc> op,
+ GenMRProcContext opProcCtx, int pos)
+ throws SemanticException {
+ Map, GenMapRedCtx> mapCurrCtx =
+ opProcCtx.getMapCurrCtx();
+ int parentPos = (pos == -1) ? 0 : pos;
+ GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(
+ parentPos));
+ Task extends Serializable> currTask = mapredCtx.getCurrTask();
+ MapredWork plan = (MapredWork) currTask.getWork();
+ HashMap, Task extends Serializable>> opTaskMap =
+ opProcCtx.getOpTaskMap();
+ Operator extends OperatorDesc> currTopOp = opProcCtx.getCurrTopOp();
- @Override
- public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd;
- GenMRProcContext opProcCtx = (GenMRProcContext) procCtx;
+ MapJoinDesc desc = (MapJoinDesc) op.getConf();
- ParseContext parseCtx = opProcCtx.getParseCtx();
- MapredWork cplan = GenMapRedUtils.getMapRedWork(parseCtx);
- Task extends Serializable> redTask = TaskFactory.get(cplan, parseCtx
- .getConf());
- Task extends Serializable> currTask = opProcCtx.getCurrTask();
+ // The map is overloaded to keep track of mapjoins also
+ opTaskMap.put(op, currTask);
- // find the branch on which this processor was invoked
- int pos = getPositionParent(mapJoin, stack);
- boolean local = (pos == ((mapJoin.getConf())).getPosBigTable()) ? false
- : true;
+ List> rootTasks = opProcCtx.getRootTasks();
+ assert (!rootTasks.contains(currTask));
+ rootTasks.add(currTask);
- GenMapRedUtils.splitTasks(mapJoin, currTask, redTask, opProcCtx, false,
- local, pos);
+ assert currTopOp != null;
+ opProcCtx.getSeenOps().add(currTopOp);
- currTask = opProcCtx.getCurrTask();
- HashMap, Task extends Serializable>> opTaskMap =
- opProcCtx.getOpTaskMap();
- Task extends Serializable> opMapTask = opTaskMap.get(mapJoin);
-
- // If the plan for this reducer does not exist, initialize the plan
- if (opMapTask == null) {
- assert cplan.getReducer() == null;
- opTaskMap.put(mapJoin, currTask);
- opProcCtx.setCurrMapJoinOp(null);
- } else {
- // The current plan can be thrown away after being merged with the
- // original plan
- GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, opProcCtx, pos,
- false, false, null);
- currTask = opMapTask;
- opProcCtx.setCurrTask(currTask);
- }
-
- return null;
+ String currAliasId = opProcCtx.getCurrAliasId();
+ boolean local = (pos == desc.getPosBigTable()) ? false : true;
+ GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx);
+ setupBucketMapJoinInfo(plan, op);
}
- }
- /**
- * MapJoin followed by Select.
- */
- public static class MapJoin implements NodeProcessor {
-
/**
- * Create a task by splitting the plan below the join. The reason, we have
- * to do so in the processing of Select and not MapJoin is due to the
- * walker. While processing a node, it is not safe to alter its children
- * because that will decide the course of the walk. It is perfectly fine to
- * muck around with its parents though, since those nodes have already been
- * visited.
+ * Merge the current task with the task for the current reducer.
+ *
+ * @param op
+ * operator being processed
+ * @param oldTask
+ * the old task for the current reducer
+ * @param task
+ * the current task for the current reducer
+ * @param opProcCtx
+ * processing context
+ * @param pos
+ * position of the parent in the stack
*/
- @Override
- public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
+ public static void joinMapJoinPlan(AbstractMapJoinOperator extends OperatorDesc> op,
+ Task extends Serializable> task,
+ GenMRProcContext opProcCtx, int pos)
+ throws SemanticException {
+ Task extends Serializable> currTask = task;
+ MapredWork plan = (MapredWork) currTask.getWork();
+ Operator extends OperatorDesc> currTopOp = opProcCtx.getCurrTopOp();
+ List> parTasks = null;
- SelectOperator sel = (SelectOperator) nd;
- AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) sel.getParentOperators().get(
- 0);
- assert sel.getParentOperators().size() == 1;
+ List> seenOps = opProcCtx.getSeenOps();
+ String currAliasId = opProcCtx.getCurrAliasId();
- GenMRProcContext ctx = (GenMRProcContext) procCtx;
- ParseContext parseCtx = ctx.getParseCtx();
-
- // is the mapjoin followed by a reducer
- List> listMapJoinOps = parseCtx
- .getListMapJoinOpsNoReducer();
-
- if (listMapJoinOps.contains(mapJoin)) {
- ctx.setCurrAliasId(null);
- ctx.setCurrTopOp(null);
- Map, GenMapRedCtx> mapCurrCtx = ctx
- .getMapCurrCtx();
- mapCurrCtx.put((Operator extends OperatorDesc>) nd, new GenMapRedCtx(
- ctx.getCurrTask(), null, null));
- return null;
+ if (!seenOps.contains(currTopOp)) {
+ seenOps.add(currTopOp);
+ boolean local = false;
+ if (pos != -1) {
+ local = (pos == ((MapJoinDesc) op.getConf()).getPosBigTable()) ? false
+ : true;
+ }
+ GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx);
+ setupBucketMapJoinInfo(plan, op);
}
-
- ctx.setCurrMapJoinOp(mapJoin);
-
- Task extends Serializable> currTask = ctx.getCurrTask();
- GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
- if (mjCtx == null) {
- mjCtx = new GenMRMapJoinCtx();
- ctx.setMapJoinCtx(mapJoin, mjCtx);
- }
-
- MapredWork mjPlan = GenMapRedUtils.getMapRedWork(parseCtx);
- Task extends Serializable> mjTask = TaskFactory.get(mjPlan, parseCtx
- .getConf());
-
- TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
- .getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol"));
-
- // generate the temporary file
- Context baseCtx = parseCtx.getContext();
- String taskTmpDir = baseCtx.getMRTmpFileURI();
-
- // Add the path to alias mapping
- mjCtx.setTaskTmpDir(taskTmpDir);
- mjCtx.setTTDesc(tt_desc);
- mjCtx.setRootMapJoinOp(sel);
-
- sel.setParentOperators(null);
-
- // Create a file sink operator for this file name
- Operator extends OperatorDesc> fs_op = OperatorFactory.get(
- new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
- HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema());
-
- assert mapJoin.getChildOperators().size() == 1;
- mapJoin.getChildOperators().set(0, fs_op);
-
- List> parentOpList =
- new ArrayList>();
- parentOpList.add(mapJoin);
- fs_op.setParentOperators(parentOpList);
-
- currTask.addDependentTask(mjTask);
-
- ctx.setCurrTask(mjTask);
- ctx.setCurrAliasId(null);
- ctx.setCurrTopOp(null);
-
- Map, GenMapRedCtx> mapCurrCtx = ctx
- .getMapCurrCtx();
- mapCurrCtx.put((Operator extends OperatorDesc>) nd, new GenMapRedCtx(
- ctx.getCurrTask(), null, null));
-
- return null;
+ currTopOp = null;
+ opProcCtx.setCurrTopOp(currTopOp);
+ opProcCtx.setCurrTask(currTask);
}
- }
- /**
- * MapJoin followed by MapJoin.
- */
- public static class MapJoinMapJoin implements NodeProcessor {
-
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- AbstractMapJoinOperator extends MapJoinDesc> mapJoin =
- (AbstractMapJoinOperator extends MapJoinDesc>) nd;
+ Object... nodeOutputs) throws SemanticException {
+ AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd;
GenMRProcContext ctx = (GenMRProcContext) procCtx;
- ctx.getParseCtx();
- AbstractMapJoinOperator extends MapJoinDesc> oldMapJoin = ctx.getCurrMapJoinOp();
-
- GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
- if (mjCtx != null) {
- mjCtx.setOldMapJoin(oldMapJoin);
- } else {
- ctx.setMapJoinCtx(mapJoin, new GenMRMapJoinCtx(null, null, null,
- oldMapJoin));
- }
- ctx.setCurrMapJoinOp(mapJoin);
-
// find the branch on which this processor was invoked
int pos = getPositionParent(mapJoin, stack);
@@ -292,97 +212,29 @@
pos));
Task extends Serializable> currTask = mapredCtx.getCurrTask();
MapredWork currPlan = (MapredWork) currTask.getWork();
- mapredCtx.getCurrAliasId();
+ Operator extends OperatorDesc> currTopOp = mapredCtx.getCurrTopOp();
+ String currAliasId = mapredCtx.getCurrAliasId();
Operator extends OperatorDesc> reducer = mapJoin;
HashMap, Task extends Serializable>> opTaskMap =
- ctx.getOpTaskMap();
+ ctx.getOpTaskMap();
Task extends Serializable> opMapTask = opTaskMap.get(reducer);
+ ctx.setCurrTopOp(currTopOp);
+ ctx.setCurrAliasId(currAliasId);
ctx.setCurrTask(currTask);
// If the plan for this reducer does not exist, initialize the plan
if (opMapTask == null) {
assert currPlan.getReducer() == null;
- GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, null, false, pos);
+ initMapJoinPlan(mapJoin, ctx, pos);
} else {
// The current plan can be thrown away after being merged with the
// original plan
- GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, ctx, pos, false,
- true, null);
+ joinMapJoinPlan(mapJoin, opMapTask, ctx, pos);
currTask = opMapTask;
ctx.setCurrTask(currTask);
}
- mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), null, null));
- return null;
- }
- }
-
- /**
- * Union followed by MapJoin.
- */
- public static class UnionMapJoin implements NodeProcessor {
-
- @Override
- public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- GenMRProcContext ctx = (GenMRProcContext) procCtx;
-
- ParseContext parseCtx = ctx.getParseCtx();
- UnionProcContext uCtx = parseCtx.getUCtx();
-
- // union was map only - no special processing needed
- if (uCtx.isMapOnlySubq()) {
- return (new TableScanMapJoin())
- .process(nd, stack, procCtx, nodeOutputs);
- }
-
- UnionOperator currUnion = Utils.findNode(stack, UnionOperator.class);
- assert currUnion != null;
- ctx.getUnionTask(currUnion);
- AbstractMapJoinOperator mapJoin = (AbstractMapJoinOperator) nd;
-
- // find the branch on which this processor was invoked
- int pos = getPositionParent(mapJoin, stack);
-
- Map, GenMapRedCtx> mapCurrCtx = ctx
- .getMapCurrCtx();
- GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(
- pos));
- Task extends Serializable> currTask = mapredCtx.getCurrTask();
- MapredWork currPlan = (MapredWork) currTask.getWork();
- Operator extends OperatorDesc> reducer = mapJoin;
- HashMap, Task extends Serializable>> opTaskMap =
- ctx.getOpTaskMap();
- Task extends Serializable> opMapTask = opTaskMap.get(reducer);
-
- // union result cannot be a map table
- boolean local = (pos != mapJoin.getConf().getPosBigTable());
- if (local) {
- throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_TABLE.getMsg());
- }
-
- // If the plan for this reducer does not exist, initialize the plan
- if (opMapTask == null) {
- assert currPlan.getReducer() == null;
- ctx.setCurrMapJoinOp(mapJoin);
- GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, currUnion, false, pos);
- ctx.setCurrUnionOp(null);
- } else {
- // The current plan can be thrown away after being merged with the
- // original plan
- Task extends Serializable> uTask = ctx.getUnionTask(currUnion).getUTask();
- if (uTask.getId().equals(opMapTask.getId())) {
- GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false,
- false, currUnion);
- } else {
- GenMapRedUtils.joinPlan(mapJoin, uTask, opMapTask, ctx, pos, false,
- false, currUnion);
- }
- currTask = opMapTask;
- ctx.setCurrTask(currTask);
- }
-
mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx
.getCurrTopOp(), ctx.getCurrAliasId()));
return null;
@@ -393,22 +245,6 @@
return new TableScanMapJoin();
}
- public static NodeProcessor getUnionMapJoin() {
- return new UnionMapJoin();
- }
-
- public static NodeProcessor getReduceSinkMapJoin() {
- return new ReduceSinkMapJoin();
- }
-
- public static NodeProcessor getMapJoin() {
- return new MapJoin();
- }
-
- public static NodeProcessor getMapJoinMapJoin() {
- return new MapJoinMapJoin();
- }
-
private MapJoinFactory() {
// prevent instantiation
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinResolver.java (working copy)
@@ -20,6 +20,7 @@
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.Serializable;
+import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -27,10 +28,12 @@
import java.util.Map;
import java.util.Stack;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -50,8 +53,10 @@
org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx;
import org.apache.hadoop.hive.ql.plan.ConditionalWork;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
public class CommonJoinResolver implements PhysicalPlanResolver {
@@ -71,7 +76,6 @@
return pctx;
}
-
/**
* Iterator each tasks. If this task has a local work,create a new task for this local work, named
* MapredLocalTask. then make this new generated task depends on current task's parent task, and
@@ -86,7 +90,129 @@
physicalContext = context;
}
- private ConditionalTask processCurrentTask(MapRedTask currTask,
+ // Get the position of the big table for this join operator and the given alias
+ private int getPosition(MapredWork work, Operator extends OperatorDesc> joinOp,
+ String alias) {
+ Operator extends OperatorDesc> parentOp = work.getAliasToWork().get(alias);
+
+ // reduceSinkOperator's child is null, but joinOperator's parents is reduceSink
+ while ((parentOp.getChildOperators() != null) &&
+ (!parentOp.getChildOperators().isEmpty())) {
+ parentOp = parentOp.getChildOperators().get(0);
+ }
+
+ return joinOp.getParentOperators().indexOf(parentOp);
+ }
+
+ /*
+ * A task and its child task has been converted from join to mapjoin.
+ * See if the two tasks can be merged.
+ */
+ private void mergeMapJoinTaskWithChildMapJoinTask(MapRedTask task) {
+ MapRedTask childTask = (MapRedTask)task.getChildTasks().get(0);
+ MapredWork work = task.getWork();
+ MapredLocalWork localWork = work.getMapLocalWork();
+ MapredWork childWork = childTask.getWork();
+ MapredLocalWork childLocalWork = childWork.getMapLocalWork();
+
+ // Can this be merged
+ Map> aliasToWork = work.getAliasToWork();
+ if (aliasToWork.size() > 1) {
+ return;
+ }
+
+ Operator extends OperatorDesc> op = aliasToWork.values().iterator().next();
+ while (op.getChildOperators() != null) {
+ // Dont perform this optimization for multi-table inserts
+ if (op.getChildOperators().size() > 1) {
+ return;
+ }
+ op = op.getChildOperators().get(0);
+ }
+
+ if (!(op instanceof FileSinkOperator)) {
+ return;
+ }
+
+ FileSinkOperator fop = (FileSinkOperator)op;
+ String workDir = fop.getConf().getDirName();
+
+ Map> childPathToAliases = childWork.getPathToAliases();
+ if (childPathToAliases.size() > 1) {
+ return;
+ }
+
+ // The filesink writes to a different directory
+ if (!childPathToAliases.keySet().iterator().next().equals(workDir)) {
+ return;
+ }
+
+ // Either of them should not be bucketed
+ if ((localWork.getBucketMapjoinContext() != null) ||
+ (childLocalWork.getBucketMapjoinContext() != null)) {
+ return;
+ }
+
+ // Merge the trees
+ if (childWork.getAliasToWork().size() > 1) {
+ return;
+ }
+
+ Operator extends Serializable> childAliasOp =
+ childWork.getAliasToWork().values().iterator().next();
+ if (fop.getParentOperators().size() > 1) {
+ return;
+ }
+
+ // Merge the 2 trees - remove the FileSinkOperator from the first tree pass it to the
+ // top of the second
+ Operator extends Serializable> parentFOp = fop.getParentOperators().get(0);
+ parentFOp.getChildOperators().remove(fop);
+ parentFOp.getChildOperators().add(childAliasOp);
+ List> parentOps =
+ new ArrayList>();
+ parentOps.add(parentFOp);
+ childAliasOp.setParentOperators(parentOps);
+
+ work.getAliasToPartnInfo().putAll(childWork.getAliasToPartnInfo());
+ for (Map.Entry childWorkEntry :
+ childWork.getPathToPartitionInfo().entrySet()) {
+ if (childWork.getAliasToPartnInfo().containsValue(childWorkEntry.getKey())) {
+ work.getPathToPartitionInfo().put(childWorkEntry.getKey(), childWorkEntry.getValue());
+ }
+ }
+
+ localWork.getAliasToFetchWork().putAll(childLocalWork.getAliasToFetchWork());
+ localWork.getAliasToWork().putAll(childLocalWork.getAliasToWork());
+
+ // remove the child task
+ List> oldChildTasks = childTask.getChildTasks();
+ task.setChildTasks(oldChildTasks);
+ if (oldChildTasks != null) {
+ for (Task extends Serializable> oldChildTask : oldChildTasks) {
+ oldChildTask.getParentTasks().remove(childTask);
+ oldChildTask.getParentTasks().add(task);
+ }
+ }
+ }
+
+ // create map join task and set big table as bigTablePosition
+ private MapRedTask convertTaskToMapJoinTask(String xml,
+ int bigTablePosition) throws UnsupportedEncodingException, SemanticException {
+ // deep copy a new mapred work from xml
+ InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
+ MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
+ // create a mapred task for this work
+ MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
+ .getParseContext().getConf());
+ JoinOperator newJoinOp = getJoinOp(newTask);
+
+ // optimize this newWork and assume big table position is i
+ MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, bigTablePosition);
+ return newTask;
+ }
+
+ private Task extends Serializable> processCurrentTask(MapRedTask currTask,
ConditionalTask conditionalTask, Context context)
throws SemanticException {
@@ -98,13 +224,15 @@
currTask.setTaskTag(Task.COMMON_JOIN);
MapredWork currWork = currTask.getWork();
+
// create conditional work list and task list
List listWorks = new ArrayList();
List> listTasks = new ArrayList>();
// create alias to task mapping and alias to input file mapping for resolver
HashMap> aliasToTask = new HashMap>();
- HashMap> pathToAliases = currTask.getWork().getPathToAliases();
+ HashMap> pathToAliases = currWork.getPathToAliases();
+ Map> aliasToWork = currWork.getAliasToWork();
// get parseCtx for this Join Operator
ParseContext parseCtx = physicalContext.getParseContext();
@@ -134,7 +262,7 @@
for (String alias : aliasList) {
aliasTotalKnownInputSize += size;
Long es = aliasToSize.get(alias);
- if(es == null) {
+ if (es == null) {
es = new Long(0);
}
es += size;
@@ -149,13 +277,67 @@
if (bigTableCandidates == null) {
return null;
}
+
+ Configuration conf = context.getConf();
+
+ // If all tables (but 1) are smaller than the size, convert the join into map-join and
+ // don't create a conditional task
+ boolean convertJoinMapJoin = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVECONVERTJOINAGGMAPJOIN);
+ int bigTablePosition = -1;
+ if (convertJoinMapJoin) {
+ // This is the threshold that the user has specified to fit in mapjoin
+ long mapJoinSize = HiveConf.getLongVar(conf,
+ HiveConf.ConfVars.HIVECONVERTJOINAGGMAPJOINSIZE);
+
+ boolean bigTableFound = false;
+ for (String alias : aliasToWork.keySet()) {
+ Long size = aliasToSize.get(alias);
+ // If more than 2 tables are bigger than the threshold, then we create
+ // a conditional task.
+ if ((size == null) || (size > mapJoinSize)) {
+ if (bigTableFound) {
+ convertJoinMapJoin = false;
+ break;
+ }
+ bigTableFound = true;
+ bigTablePosition = getPosition(currWork, joinOp, alias);
+ }
+ }
+ }
+
+ String bigTableAlias = null;
currWork.setOpParseCtxMap(parseCtx.getOpParseCtx());
currWork.setJoinTree(joinTree);
-
String xml = currWork.toXML();
- String bigTableAlias = null;
- long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(context.getConf(),
+ if (convertJoinMapJoin) {
+ // If all the tables are small enough to fit in the mapjoin threshold, choose
+ // a table randomly to the big table
+ if (bigTablePosition < 0) {
+ bigTablePosition = getPosition(currWork, joinOp,
+ aliasToWork.keySet().iterator().next());
+ }
+
+ // create map join task and set big table as bigTablePosition
+ MapRedTask newTask = convertTaskToMapJoinTask(xml, bigTablePosition);
+
+ newTask.setTaskTag(Task.MAPJOIN_ONLY_NOBACKUP);
+ replaceTask(currTask, newTask, physicalContext);
+
+ // Can this task be merged with the child task. This can happen if a big table is being
+ // joined with multiple small tables on different keys
+ // Further optimizations are possible here, a join which has been converted to a mapjoin
+ // followed by a mapjoin can be performed in a single MR job.
+ if ((newTask.getChildTasks() != null) && (newTask.getChildTasks().size() == 1)
+ && (newTask.getChildTasks().get(0).getTaskTag() == Task.MAPJOIN_ONLY_NOBACKUP)) {
+ mergeMapJoinTaskWithChildMapJoinTask(newTask);
+ }
+
+ return newTask;
+ }
+
+ long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(conf,
HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
for (int i = 0; i < numAliases; i++) {
// this table cannot be big table
@@ -164,17 +346,8 @@
}
// create map join task and set big table as i
- // deep copy a new mapred work from xml
- InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
- MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
- // create a mapred task for this work
- MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
- .getParseContext().getConf());
- JoinOperator newJoinOp = getJoinOp(newTask);
+ MapRedTask newTask = convertTaskToMapJoinTask(xml, i);
- // optimize this newWork and assume big table position is i
- bigTableAlias = MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, i);
-
Long aliasKnownSize = aliasToSize.get(bigTableAlias);
if (aliasKnownSize != null && aliasKnownSize.longValue() > 0) {
long smallTblTotalKnownSize = aliasTotalKnownInputSize
@@ -186,7 +359,7 @@
}
// add into conditional task
- listWorks.add(newWork);
+ listWorks.add(newTask.getWork());
listTasks.add(newTask);
newTask.setTaskTag(Task.CONVERTED_MAPJOIN);
@@ -263,6 +436,41 @@
}
}
+ // Replace the task with the new task. Copy the children and parents of the old
+ // task to the new task.
+ private void replaceTask(
+ Task extends Serializable> currTask, Task extends Serializable> newTask,
+ PhysicalContext physicalContext) {
+ // add this task into task tree
+ // set all parent tasks
+ List> parentTasks = currTask.getParentTasks();
+ currTask.setParentTasks(null);
+ if (parentTasks != null) {
+ for (Task extends Serializable> tsk : parentTasks) {
+ // make new generated task depends on all the parent tasks of current task.
+ tsk.addDependentTask(newTask);
+ // remove the current task from its original parent task's dependent task
+ tsk.removeDependentTask(currTask);
+ }
+ } else {
+ // remove from current root task and add conditional task to root tasks
+ physicalContext.removeFromRootTask(currTask);
+ physicalContext.addToRootTask(newTask);
+ }
+
+ // set all child tasks
+ List> oldChildTasks = currTask.getChildTasks();
+ currTask.setChildTasks(null);
+ if (oldChildTasks != null) {
+ for (Task extends Serializable> tsk : oldChildTasks) {
+ // make new generated task depends on all the parent tasks of current task.
+ newTask.addDependentTask(tsk);
+ // remove the current task from its original parent task's dependent task
+ tsk.getParentTasks().remove(currTask);
+ }
+ }
+ }
+
@Override
public Object dispatch(Node nd, Stack stack, Object... nodeOutputs)
throws SemanticException {
@@ -280,14 +488,15 @@
List> taskList = ((ConditionalTask) currTask).getListTasks();
for (Task extends Serializable> tsk : taskList) {
if (tsk.isMapRedTask()) {
- ConditionalTask cndTask = this.processCurrentTask((MapRedTask) tsk,
+ Task extends Serializable> newTask = this.processCurrentTask((MapRedTask) tsk,
((ConditionalTask) currTask), physicalContext.getContext());
- walkerCtx.addToDispatchList(cndTask);
+ walkerCtx.addToDispatchList(newTask);
}
}
} else {
- ConditionalTask cndTask = this.processCurrentTask((MapRedTask) currTask, null, physicalContext.getContext());
- walkerCtx.addToDispatchList(cndTask);
+ Task extends Serializable> newTask =
+ this.processCurrentTask((MapRedTask) currTask, null, physicalContext.getContext());
+ walkerCtx.addToDispatchList(newTask);
}
}
return null;
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRRedSink2.java (working copy)
@@ -71,7 +71,7 @@
if (opMapTask == null) {
GenMapRedUtils.splitPlan(op, ctx);
} else {
- GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true, false, null);
+ GenMapRedUtils.joinPlan(op, currTask, opMapTask, ctx, -1, true);
currTask = opMapTask;
ctx.setCurrTask(currTask);
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (working copy)
@@ -32,12 +32,10 @@
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -52,7 +50,6 @@
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
@@ -67,7 +64,6 @@
import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -774,13 +770,7 @@
private String processFS(Node nd, Stack stack,
NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException {
- // Is it the dummy file sink after the mapjoin
FileSinkOperator fsOp = (FileSinkOperator) nd;
- if ((fsOp.getParentOperators().size() == 1)
- && (fsOp.getParentOperators().get(0) instanceof MapJoinOperator)) {
- return null;
- }
-
GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
List seenFSOps = ctx.getSeenFileSinkOps();
if (seenFSOps == null) {
@@ -884,24 +874,6 @@
return dest;
}
- AbstractMapJoinOperator extends MapJoinDesc> currMapJoinOp = ctx.getCurrMapJoinOp();
-
- if (currMapJoinOp != null) {
- opTaskMap.put(null, currTask);
- GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(currMapJoinOp);
- MapredWork plan = (MapredWork) currTask.getWork();
-
- String taskTmpDir = mjCtx.getTaskTmpDir();
- TableDesc tt_desc = mjCtx.getTTDesc();
- assert plan.getPathToAliases().get(taskTmpDir) == null;
- plan.getPathToAliases().put(taskTmpDir, new ArrayList());
- plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
- plan.getPathToPartitionInfo().put(taskTmpDir,
- new PartitionDesc(tt_desc, null));
- plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp());
- return dest;
- }
-
return dest;
}
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (working copy)
@@ -75,9 +75,9 @@
public static final int CONVERTED_MAPJOIN = 2;
public static final int CONVERTED_LOCAL_MAPJOIN = 3;
public static final int BACKUP_COMMON_JOIN = 4;
- public static final int LOCAL_MAPJOIN=5;
+ public static final int LOCAL_MAPJOIN = 5;
+ public static final int MAPJOIN_ONLY_NOBACKUP = 6;
-
// Descendants tasks who subscribe feeds from this task
protected transient List> feedSubscribers;
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (working copy)
@@ -148,4 +148,14 @@
public OperatorType getType() {
return OperatorType.UNION;
}
+
+ @Override
+ public boolean opAllowedBeforeMapJoin() {
+ return false;
+ }
+
+ @Override
+ public boolean opAllowedAfterMapJoin() {
+ return false;
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy)
@@ -1429,4 +1429,22 @@
public boolean supportUnionRemoveOptimization() {
return false;
}
+
+ /*
+ * This operator is allowed before mapjoin. Eventually, mapjoin hint should be done away with.
+ * But, since bucketized mapjoin and sortmerge join depend on it completely. it is needed.
+ * Check the operators which are allowed before mapjoin.
+ */
+ public boolean opAllowedBeforeMapJoin() {
+ return true;
+ }
+
+ /*
+ * This operator is allowed after mapjoin. Eventually, mapjoin hint should be done away with.
+ * But, since bucketized mapjoin and sortmerge join depend on it completely. it is needed.
+ * Check the operators which are allowed after mapjoin.
+ */
+ public boolean opAllowedAfterMapJoin() {
+ return true;
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (working copy)
@@ -321,4 +321,9 @@
public OperatorType getType() {
return OperatorType.REDUCESINK;
}
+
+ @Override
+ public boolean opAllowedBeforeMapJoin() {
+ return false;
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (working copy)
@@ -934,4 +934,13 @@
this.posToAliasMap = posToAliasMap;
}
+ @Override
+ public boolean opAllowedBeforeMapJoin() {
+ return false;
+ }
+
+ @Override
+ public boolean opAllowedAfterMapJoin() {
+ return false;
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java (working copy)
@@ -19,15 +19,11 @@
import java.io.Serializable;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -51,7 +47,7 @@
HashMap> pathToAliases;
HashMap aliasToKnownSize;
private Task extends Serializable> commonJoinTask;
-
+
private String localTmpDir;
private String hdfsTmpDir;
@@ -155,7 +151,7 @@
return (int)(size - o.size);
}
}
-
+
private String resolveMapJoinTask(
HashMap> pathToAliases,
HashMap> aliasToTask,
@@ -164,14 +160,14 @@
String bigTableFileAlias = null;
long smallTablesFileSizeSum = 0;
-
+
Map aliasToFileSizeMap = new HashMap();
for (Map.Entry entry : aliasToKnownSize.entrySet()) {
String alias = entry.getKey();
AliasFileSizePair pair = new AliasFileSizePair(alias, entry.getValue());
aliasToFileSizeMap.put(alias, pair);
}
-
+
try {
// need to compute the input size at runtime, and select the biggest as
// the big table.
@@ -199,7 +195,7 @@
}
// generate file size to alias mapping; but not set file size as key,
// because different file may have the same file size.
-
+
List aliasFileSizeList = new ArrayList(
aliasToFileSizeMap.values());
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy)
@@ -72,7 +72,6 @@
import org.apache.hadoop.hive.ql.exec.RecordWriter;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
@@ -107,7 +106,6 @@
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3;
-import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4;
import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1;
import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1;
import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
@@ -2441,7 +2439,7 @@
boolean subQuery = qb.getParseInfo().getIsSubQ();
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
- : getUnescapedName((ASTNode)expr.getChild(0)).toLowerCase(),
+ : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery);
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
@@ -2455,7 +2453,7 @@
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
- .getChild(0).getText().toLowerCase())) && !hasAsClause
+ .getChild(0).getText().toLowerCase())) && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(1).getText()))) {
// In case the expression is TABLE.COL (col can be regex).
@@ -2463,7 +2461,7 @@
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
unescapeIdentifier(expr.getChild(0).getChild(0).getText()
- .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch,
+ .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch,
qb.getAliases(), subQuery);
} else {
// Case when this is an expression
@@ -5113,7 +5111,7 @@
// set the stats publishing/aggregating key prefix
// the same as directory name. The directory name
- // can be changed in the optimizer but the key should not be changed
+ // can be changed in the optimizer but the key should not be changed
// it should be the same as the MoveWork's sourceDir.
fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName());
@@ -8087,24 +8085,9 @@
opRules.put(new RuleRegExp(new String("R6"),
UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"),
new GenMRRedSink3());
- opRules.put(new RuleRegExp(new String("R6"),
- MapJoinOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"),
- new GenMRRedSink4());
opRules.put(new RuleRegExp(new String("R7"),
- TableScanOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"),
+ MapJoinOperator.getOperatorName() + "%"),
MapJoinFactory.getTableScanMapJoin());
- opRules.put(new RuleRegExp(new String("R8"),
- ReduceSinkOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"),
- MapJoinFactory.getReduceSinkMapJoin());
- opRules.put(new RuleRegExp(new String("R9"),
- UnionOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"),
- MapJoinFactory.getUnionMapJoin());
- opRules.put(new RuleRegExp(new String("R10"),
- MapJoinOperator.getOperatorName() + "%.*" + MapJoinOperator.getOperatorName() + "%"),
- MapJoinFactory.getMapJoinMapJoin());
- opRules.put(new RuleRegExp(new String("R11"),
- MapJoinOperator.getOperatorName() + "%" + SelectOperator.getOperatorName() + "%"),
- MapJoinFactory.getMapJoin());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Index: ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (revision 1438313)
+++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (working copy)
@@ -324,6 +324,9 @@
"(higher than the number of rows per input row due to grouping sets in the query), or " +
"rewrite the query to not use distincts."),
+ OPERATOR_NOT_ALLOWED_WITH_MAPJOIN(10227,
+ "All operators are not allowed with mapjoin hint. Remove the mapjoin hint."),
+
SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "
+ "It may have crashed with an error."),