Index: build.xml =================================================================== --- build.xml (revision 1467218) +++ build.xml (working copy) @@ -136,7 +136,6 @@ - @@ -510,14 +509,6 @@ - - - - - - - - @@ -766,9 +756,6 @@ - - @@ -962,7 +949,6 @@ - @@ -979,7 +965,6 @@ - @@ -1081,8 +1066,6 @@ todir="${mvn.jar.dir}" /> - - - - - @@ -1373,16 +1351,6 @@ output.file="${mvn.jar.dir}/hive-metastore-${version}.pom.asc" gpg.passphrase="${gpg.passphrase}"/> - - - - - @@ -198,7 +197,6 @@ - Index: ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out (revision 1467218) +++ ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out (working copy) @@ -927,11 +927,42 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + subq2:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq2:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 6) + type: boolean + Select Operator + expressions: + expr: key + type: int + outputColumnNames: _col0 + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: subq1:a @@ -946,7 +977,7 @@ expr: key type: int outputColumnNames: _col0 - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -964,12 +995,25 @@ bucketGroup: false mode: hash outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1053,14 +1097,20 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: subq2:subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq2:subq1:a TableScan alias: a Filter Operator @@ -1072,9 +1122,7 @@ expr: key type: int outputColumnNames: _col0 - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator condition expressions: 0 1 @@ -1082,20 +1130,51 @@ keys: 0 [Column[_col0]] 1 [Column[key]] - Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Position of Big Table: 1 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1203,14 +1282,45 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq4) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL subq4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + subq2:subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq2:subq1:a + TableScan + alias: a + Filter Operator + predicate: + expr: ((key < 8) and (key < 6)) + type: boolean + Select Operator + expressions: + expr: key + type: int + outputColumnNames: _col0 + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: - subq2:subq1:a + subq4:subq3:a TableScan alias: a Filter Operator @@ -1222,7 +1332,7 @@ expr: key type: int outputColumnNames: _col0 - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -1232,7 +1342,7 @@ keys: 0 [Column[_col0]] 1 [Column[_col0]] - Position of Big Table: 0 + Position of Big Table: 1 Select Operator Group By Operator aggregations: @@ -1240,12 +1350,25 @@ bucketGroup: false mode: hash outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1343,11 +1466,42 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL a) value)) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + subq2:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq2:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 8) + type: boolean + Select Operator + expressions: + expr: key + type: int + outputColumnNames: _col0 + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: subq1:a @@ -1362,7 +1516,7 @@ expr: key type: int outputColumnNames: _col0 - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -1380,12 +1534,25 @@ bucketGroup: false mode: hash outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1607,11 +1774,33 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_TABREF (TOK_TABNAME tbl2) a) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL a) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: subq1:a @@ -1626,7 +1815,7 @@ expr: key type: int outputColumnNames: _col0 - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -1644,12 +1833,25 @@ bucketGroup: false mode: hash outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1717,19 +1919,23 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a TableScan alias: a - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator condition expressions: 0 1 @@ -1737,20 +1943,60 @@ keys: 0 [Column[key]] 1 [Column[_col0]] - Position of Big Table: 0 + Position of Big Table: 1 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + subq1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 6) + type: boolean Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + expressions: + expr: key + type: int + outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[_col0]] + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1828,14 +2074,73 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl2) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq3) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + subq2:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 6) + type: boolean + Select Operator + expressions: + expr: key + type: int + outputColumnNames: _col0 + HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + Position of Big Table: 2 + subq2:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 6) + type: boolean + Select Operator + expressions: + expr: key + type: int + outputColumnNames: _col0 + HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + Position of Big Table: 2 + + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: - subq1:a + subq3:a TableScan alias: a Filter Operator @@ -1847,7 +2152,7 @@ expr: key type: int outputColumnNames: _col0 - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 @@ -1860,7 +2165,7 @@ 0 [Column[_col0]] 1 [Column[_col0]] 2 [Column[_col0]] - Position of Big Table: 0 + Position of Big Table: 2 Select Operator Group By Operator aggregations: @@ -1868,12 +2173,25 @@ bucketGroup: false mode: hash outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: @@ -1965,14 +2283,20 @@ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tbl1) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value) value)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 8)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 6)))) subq2) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL subq2) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq2) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value) value2)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: a:subq2:subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a:subq2:subq1:a TableScan alias: a Filter Operator @@ -1984,9 +2308,7 @@ expr: key type: int outputColumnNames: _col0 - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator condition expressions: 0 1 @@ -1994,20 +2316,51 @@ keys: 0 [Column[_col0]] 1 [Column[key]] - Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: - expr: count() - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint + Position of Big Table: 1 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + a:b + TableScan + alias: b + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint Reduce Operator Tree: Group By Operator aggregations: Index: ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q =================================================================== --- ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q (revision 0) +++ ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q (working copy) @@ -0,0 +1,36 @@ +-- small 1 part, 2 bucket & big 2 part, 4 bucket + +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08'); +load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08'); + +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); +load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); + +load data local inpath '../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); +load data local inpath '../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); + +set hive.auto.convert.join=true; + +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; + +set hive.auto.convert.sortmerge.join=true; +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge=true; + +-- Since size is being used to find the big table, the order of the tables in the join does not matter +-- The tables are only bucketed and not sorted, the join should not be converted +-- Currenly, a join is only converted to a sort-merge join without a hint, automatic conversion to +-- bucketized mapjoin is not done +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; + +-- The join is converted to a bucketed mapjoin with a mapjoin hint +explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (revision 1467218) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (working copy) @@ -390,11 +390,10 @@ // Can the join operator be converted to a sort-merge join operator ? // It is already verified that the join can be converted to a bucket map join protected boolean checkConvertJoinToSMBJoin( - JoinOperator joinOperator, - SortBucketJoinProcCtx smbJoinContext, - ParseContext pGraphContext) throws SemanticException { + JoinOperator joinOperator, + SortBucketJoinProcCtx smbJoinContext, + ParseContext pGraphContext) throws SemanticException { - boolean tableEligibleForBucketedSortMergeJoin = true; QBJoinTree joinCtx = pGraphContext.getJoinContext().get(joinOperator); if (joinCtx == null) { @@ -409,14 +408,15 @@ List sortColumnsFirstTable = new ArrayList(); for (int pos = 0; pos < srcs.length; pos++) { - tableEligibleForBucketedSortMergeJoin = tableEligibleForBucketedSortMergeJoin && - isEligibleForBucketSortMergeJoin(smbJoinContext, - pGraphContext, - smbJoinContext.getKeyExprMap().get((byte)pos), - joinCtx, - srcs, - pos, - sortColumnsFirstTable); + if (!isEligibleForBucketSortMergeJoin(smbJoinContext, + pGraphContext, + smbJoinContext.getKeyExprMap().get((byte) pos), + joinCtx, + srcs, + pos, + sortColumnsFirstTable)) { + return false; + } } smbJoinContext.setSrcs(srcs); @@ -489,9 +489,10 @@ } context.setKeyExprMap(keyExprMap); - String[] srcs = joinCtx.getBaseSrc(); - for (int srcPos = 0; srcPos < srcs.length; srcPos++) { - srcs[srcPos] = QB.getAppendedAliasFromId(joinCtx.getId(), srcs[srcPos]); + String[] joinSrcs = joinCtx.getBaseSrc(); + String[] srcs = new String[joinSrcs.length]; + for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) { + srcs[srcPos] = new String(QB.getAppendedAliasFromId(joinCtx.getId(), joinSrcs[srcPos])); } // Given a candidate map-join, can this join be converted.