Index: build.xml
===================================================================
--- build.xml (revision 1467218)
+++ build.xml (working copy)
@@ -136,7 +136,6 @@
-
@@ -510,14 +509,6 @@
-
-
-
-
-
-
-
-
@@ -766,9 +756,6 @@
-
-
@@ -962,7 +949,6 @@
-
@@ -979,7 +965,6 @@
-
@@ -1081,8 +1066,6 @@
todir="${mvn.jar.dir}" />
-
-
-
-
-
@@ -1373,16 +1351,6 @@
output.file="${mvn.jar.dir}/hive-metastore-${version}.pom.asc"
gpg.passphrase="${gpg.passphrase}"/>
-
-
-
-
-
@@ -198,7 +197,6 @@
-
Index: ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out
===================================================================
--- ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out (revision 1467218)
+++ ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out (working copy)
@@ -927,11 +927,42 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ subq2:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ subq2:a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: (key < 6)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ outputColumnNames: _col0
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 0
+
+ Stage: Stage-4
Map Reduce
Alias -> Map Operator Tree:
subq1:a
@@ -946,7 +977,7 @@
expr: key
type: int
outputColumnNames: _col0
- Sorted Merge Bucket Map Join Operator
+ Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
@@ -964,12 +995,25 @@
bucketGroup: false
mode: hash
outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1053,14 +1097,20 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
subq2:subq1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ subq2:subq1:a
TableScan
alias: a
Filter Operator
@@ -1072,9 +1122,7 @@
expr: key
type: int
outputColumnNames: _col0
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
+ HashTable Sink Operator
condition expressions:
0
1
@@ -1082,20 +1130,51 @@
keys:
0 [Column[_col0]]
1 [Column[key]]
- Position of Big Table: 0
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Position of Big Table: 1
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ b
+ TableScan
+ alias: b
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1203,14 +1282,45 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq4) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL subq4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ subq2:subq1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ subq2:subq1:a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: ((key < 8) and (key < 6))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ outputColumnNames: _col0
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+
+ Stage: Stage-4
Map Reduce
Alias -> Map Operator Tree:
- subq2:subq1:a
+ subq4:subq3:a
TableScan
alias: a
Filter Operator
@@ -1222,7 +1332,7 @@
expr: key
type: int
outputColumnNames: _col0
- Sorted Merge Bucket Map Join Operator
+ Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
@@ -1232,7 +1342,7 @@
keys:
0 [Column[_col0]]
1 [Column[_col0]]
- Position of Big Table: 0
+ Position of Big Table: 1
Select Operator
Group By Operator
aggregations:
@@ -1240,12 +1350,25 @@
bucketGroup: false
mode: hash
outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1343,11 +1466,42 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ subq2:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ subq2:a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: (key < 8)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ outputColumnNames: _col0
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ Position of Big Table: 0
+
+ Stage: Stage-4
Map Reduce
Alias -> Map Operator Tree:
subq1:a
@@ -1362,7 +1516,7 @@
expr: key
type: int
outputColumnNames: _col0
- Sorted Merge Bucket Map Join Operator
+ Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
@@ -1380,12 +1534,25 @@
bucketGroup: false
mode: hash
outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1607,11 +1774,33 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_TABREF (TOK_TABNAME tbl2) a) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+
+ Stage: Stage-4
Map Reduce
Alias -> Map Operator Tree:
subq1:a
@@ -1626,7 +1815,7 @@
expr: key
type: int
outputColumnNames: _col0
- Sorted Merge Bucket Map Join Operator
+ Map Join Operator
condition map:
Inner Join 0 to 1
condition expressions:
@@ -1644,12 +1833,25 @@
bucketGroup: false
mode: hash
outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1717,19 +1919,23 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
TableScan
alias: a
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
+ HashTable Sink Operator
condition expressions:
0
1
@@ -1737,20 +1943,60 @@
keys:
0 [Column[key]]
1 [Column[_col0]]
- Position of Big Table: 0
+ Position of Big Table: 1
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ subq1:a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: (key < 6)
+ type: boolean
Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ expressions:
+ expr: key
+ type: int
+ outputColumnNames: _col0
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[_col0]]
+ Position of Big Table: 1
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1828,14 +2074,73 @@
(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq3) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-2 depends on stages: Stage-5
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-6
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ subq1:a
+ Fetch Operator
+ limit: -1
+ subq2:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ subq1:a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: (key < 6)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ outputColumnNames: _col0
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ 2
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ 2 [Column[_col0]]
+ Position of Big Table: 2
+ subq2:a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: (key < 6)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ outputColumnNames: _col0
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ 2
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[_col0]]
+ 2 [Column[_col0]]
+ Position of Big Table: 2
+
+ Stage: Stage-5
Map Reduce
Alias -> Map Operator Tree:
- subq1:a
+ subq3:a
TableScan
alias: a
Filter Operator
@@ -1847,7 +2152,7 @@
expr: key
type: int
outputColumnNames: _col0
- Sorted Merge Bucket Map Join Operator
+ Map Join Operator
condition map:
Inner Join 0 to 1
Inner Join 0 to 2
@@ -1860,7 +2165,7 @@
0 [Column[_col0]]
1 [Column[_col0]]
2 [Column[_col0]]
- Position of Big Table: 0
+ Position of Big Table: 2
Select Operator
Group By Operator
aggregations:
@@ -1868,12 +2173,25 @@
bucketGroup: false
mode: hash
outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1965,14 +2283,20 @@
(TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-2 depends on stages: Stage-4
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Alias -> Map Operator Tree:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
a:subq2:subq1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a:subq2:subq1:a
TableScan
alias: a
Filter Operator
@@ -1984,9 +2308,7 @@
expr: key
type: int
outputColumnNames: _col0
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
+ HashTable Sink Operator
condition expressions:
0
1
@@ -1994,20 +2316,51 @@
keys:
0 [Column[_col0]]
1 [Column[key]]
- Position of Big Table: 0
- Select Operator
- Group By Operator
- aggregations:
- expr: count()
- bucketGroup: false
- mode: hash
- outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Position of Big Table: 1
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a:b
+ TableScan
+ alias: b
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
Index: ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
===================================================================
--- ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q (revision 0)
+++ ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q (working copy)
@@ -0,0 +1,36 @@
+-- small 1 part, 2 bucket & big 2 part, 4 bucket
+
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
+
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
+
+load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
+
+set hive.auto.convert.join=true;
+
+explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.bucketmapjoin.sortedmerge=true;
+
+-- Since size is being used to find the big table, the order of the tables in the join does not matter
+-- The tables are only bucketed and not sorted, the join should not be converted
+-- Currenly, a join is only converted to a sort-merge join without a hint, automatic conversion to
+-- bucketized mapjoin is not done
+explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+
+-- The join is converted to a bucketed mapjoin with a mapjoin hint
+explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
+select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (revision 1467218)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (working copy)
@@ -390,11 +390,10 @@
// Can the join operator be converted to a sort-merge join operator ?
// It is already verified that the join can be converted to a bucket map join
protected boolean checkConvertJoinToSMBJoin(
- JoinOperator joinOperator,
- SortBucketJoinProcCtx smbJoinContext,
- ParseContext pGraphContext) throws SemanticException {
+ JoinOperator joinOperator,
+ SortBucketJoinProcCtx smbJoinContext,
+ ParseContext pGraphContext) throws SemanticException {
- boolean tableEligibleForBucketedSortMergeJoin = true;
QBJoinTree joinCtx = pGraphContext.getJoinContext().get(joinOperator);
if (joinCtx == null) {
@@ -409,14 +408,15 @@
List sortColumnsFirstTable = new ArrayList();
for (int pos = 0; pos < srcs.length; pos++) {
- tableEligibleForBucketedSortMergeJoin = tableEligibleForBucketedSortMergeJoin &&
- isEligibleForBucketSortMergeJoin(smbJoinContext,
- pGraphContext,
- smbJoinContext.getKeyExprMap().get((byte)pos),
- joinCtx,
- srcs,
- pos,
- sortColumnsFirstTable);
+ if (!isEligibleForBucketSortMergeJoin(smbJoinContext,
+ pGraphContext,
+ smbJoinContext.getKeyExprMap().get((byte) pos),
+ joinCtx,
+ srcs,
+ pos,
+ sortColumnsFirstTable)) {
+ return false;
+ }
}
smbJoinContext.setSrcs(srcs);
@@ -489,9 +489,10 @@
}
context.setKeyExprMap(keyExprMap);
- String[] srcs = joinCtx.getBaseSrc();
- for (int srcPos = 0; srcPos < srcs.length; srcPos++) {
- srcs[srcPos] = QB.getAppendedAliasFromId(joinCtx.getId(), srcs[srcPos]);
+ String[] joinSrcs = joinCtx.getBaseSrc();
+ String[] srcs = new String[joinSrcs.length];
+ for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
+ srcs[srcPos] = new String(QB.getAppendedAliasFromId(joinCtx.getId(), joinSrcs[srcPos]));
}
// Given a candidate map-join, can this join be converted.