Index: ql/src/test/results/clientpositive/smb_mapjoin_17.q.out =================================================================== --- ql/src/test/results/clientpositive/smb_mapjoin_17.q.out (revision 0) +++ ql/src/test/results/clientpositive/smb_mapjoin_17.q.out (working copy) @@ -0,0 +1,814 @@ +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table3 +PREHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table4 +PREHOOK: query: CREATE TABLE test_table5 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table5 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table5 +PREHOOK: query: CREATE TABLE test_table6 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table6 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table6 +PREHOOK: query: CREATE TABLE test_table7 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table7 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table7 +PREHOOK: query: CREATE TABLE test_table8 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_table8 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_table8 +PREHOOK: query: INSERT OVERWRITE TABLE test_table1 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table1 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table2 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table2 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table3 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table3 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table3 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table4 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table4 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table4 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table4 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table5 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table5 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table5 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table5 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table6 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table6 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table6 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table6 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table7 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table7 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table7 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table7 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE test_table8 +SELECT * FROM src WHERE key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table8 +POSTHOOK: query: INSERT OVERWRITE TABLE test_table8 +SELECT * FROM src WHERE key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table8 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables +EXPLAIN +SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables +EXPLAIN +SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME test_table3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME test_table4) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key))) (TOK_TABREF (TOK_TABNAME test_table5) e) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL e) key))) (TOK_TABREF (TOK_TABNAME test_table6) f) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL f) key))) (TOK_TABREF (TOK_TABNAME test_table7) g) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL g) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c d e f g))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + Inner Join 0 to 4 + Inner Join 0 to 5 + Inner Join 0 to 6 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + 3 [Column[key]] + 4 [Column[key]] + 5 [Column[key]] + 6 [Column[key]] + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table4 +PREHOOK: Input: default@test_table5 +PREHOOK: Input: default@test_table6 +PREHOOK: Input: default@test_table7 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table4 +POSTHOOK: Input: default@test_table5 +POSTHOOK: Input: default@test_table6 +POSTHOOK: Input: default@test_table7 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +4378 +PREHOOK: query: -- It should be automatically converted to a sort-merge join followed by a groupby in +-- a single MR job +EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +PREHOOK: type: QUERY +POSTHOOK: query: -- It should be automatically converted to a sort-merge join followed by a groupby in +-- a single MR job +EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME test_table3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME test_table4) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key))) (TOK_TABREF (TOK_TABNAME test_table5) e) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL e) key))) (TOK_TABREF (TOK_TABNAME test_table6) f) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL f) key))) (TOK_TABREF (TOK_TABNAME test_table7) g) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL g) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + condition expressions: + 0 {key} + 1 + 2 + 3 + 4 + 5 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + 3 [Column[key]] + 4 [Column[key]] + 5 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 0 + g + TableScan + alias: g + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table4 +PREHOOK: Input: default@test_table5 +PREHOOK: Input: default@test_table6 +PREHOOK: Input: default@test_table7 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table4 +POSTHOOK: Input: default@test_table5 +POSTHOOK: Input: default@test_table6 +POSTHOOK: Input: default@test_table7 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +4378 +PREHOOK: query: EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF (TOK_TABNAME test_table3) c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key))) (TOK_TABREF (TOK_TABNAME test_table4) d) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL d) key))) (TOK_TABREF (TOK_TABNAME test_table5) e) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL e) key))) (TOK_TABREF (TOK_TABNAME test_table6) f) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL f) key))) (TOK_TABREF (TOK_TABNAME test_table7) g) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL g) key))) (TOK_TABREF (TOK_TABNAME test_table8) h) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL h) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-12 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-12 + Stage-3 depends on stages: Stage-11 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Sorted Merge Bucket Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + condition expressions: + 0 {key} + 1 + 2 + 3 + 4 + 5 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + 3 [Column[key]] + 4 [Column[key]] + 5 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 0 + value expressions: + expr: _col0 + type: int + g + TableScan + alias: g + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col20} + 1 + handleSkewJoin: false + outputColumnNames: _col20 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + h + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + h + TableScan + alias: h + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col20]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-11 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col20]] + 1 [Column[key]] + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table4 +PREHOOK: Input: default@test_table5 +PREHOOK: Input: default@test_table6 +PREHOOK: Input: default@test_table7 +PREHOOK: Input: default@test_table8 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table4 +POSTHOOK: Input: default@test_table5 +POSTHOOK: Input: default@test_table6 +POSTHOOK: Input: default@test_table7 +POSTHOOK: Input: default@test_table8 +#### A masked pattern was here #### +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table5.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table6.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table7.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table8.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +13126 Index: ql/src/test/queries/clientpositive/smb_mapjoin_17.q =================================================================== --- ql/src/test/queries/clientpositive/smb_mapjoin_17.q (revision 0) +++ ql/src/test/queries/clientpositive/smb_mapjoin_17.q (working copy) @@ -0,0 +1,101 @@ +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.enforce.bucketing=true; +set hive.enforce.sorting=true; +set hive.exec.reducers.max = 1; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table5 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table6 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table7 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; +CREATE TABLE test_table8 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; + +INSERT OVERWRITE TABLE test_table1 +SELECT * FROM src WHERE key < 10; + +INSERT OVERWRITE TABLE test_table2 +SELECT * FROM src WHERE key < 10; + +INSERT OVERWRITE TABLE test_table3 +SELECT * FROM src WHERE key < 10; + +INSERT OVERWRITE TABLE test_table4 +SELECT * FROM src WHERE key < 10; + +INSERT OVERWRITE TABLE test_table5 +SELECT * FROM src WHERE key < 10; + +INSERT OVERWRITE TABLE test_table6 +SELECT * FROM src WHERE key < 10; + +INSERT OVERWRITE TABLE test_table7 +SELECT * FROM src WHERE key < 10; + +INSERT OVERWRITE TABLE test_table8 +SELECT * FROM src WHERE key < 10; + +-- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables +EXPLAIN +SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key; + +SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*) +FROM test_table1 a JOIN test_table2 b ON a.key = b.key +JOIN test_table3 c ON a.key = c.key +JOIN test_table4 d ON a.key = d.key +JOIN test_table5 e ON a.key = e.key +JOIN test_table6 f ON a.key = f.key +JOIN test_table7 g ON a.key = g.key; + +set hive.auto.convert.join=true; +set hive.auto.convert.sortmerge.join=true; + +-- It should be automatically converted to a sort-merge join followed by a groupby in +-- a single MR job +EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key; + +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key; + +EXPLAIN +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key; + +SELECT count(*) +FROM test_table1 a LEFT OUTER JOIN test_table2 b ON a.key = b.key +LEFT OUTER JOIN test_table3 c ON a.key = c.key +LEFT OUTER JOIN test_table4 d ON a.key = d.key +LEFT OUTER JOIN test_table5 e ON a.key = e.key +LEFT OUTER JOIN test_table6 f ON a.key = f.key +LEFT OUTER JOIN test_table7 g ON a.key = g.key +LEFT OUTER JOIN test_table8 h ON a.key = h.key; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1459154) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -6580,7 +6580,7 @@ } if (!node.getNoOuterJoin() || !target.getNoOuterJoin()) { // todo 8 way could be not enough number - if (node.getRightAliases().length + node.getRightAliases().length + 1 >= 8) { + if (node.getLeftAliases().length + node.getRightAliases().length + 1 >= 8) { LOG.info(ErrorMsg.JOINNODE_OUTERJOIN_MORETHAN_8); return false; }