Index: data/files/fact-data.txt =================================================================== --- data/files/fact-data.txt (revision 0) +++ data/files/fact-data.txt (revision 0) @@ -0,0 +1,10 @@ +1212 +111212 +212212 +313212 +414212 +515234 +616234 +717234 +818234 +919234 Index: data/files/dim-data.txt =================================================================== --- data/files/dim-data.txt (revision 0) +++ data/files/dim-data.txt (revision 0) @@ -0,0 +1,4 @@ +11 +22 +33 +44 Index: ql/src/test/results/clientpositive/join_star.q.out =================================================================== --- ql/src/test/results/clientpositive/join_star.q.out (revision 0) +++ ql/src/test/results/clientpositive/join_star.q.out (revision 0) @@ -0,0 +1,638 @@ +PREHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@fact +PREHOOK: query: create table dim1(f1 int, f2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim1(f1 int, f2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim1 +PREHOOK: query: create table dim2(f3 int, f4 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim2(f3 int, f4 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim2 +PREHOOK: query: create table dim3(f5 int, f6 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim3(f5 int, f6 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact +PREHOOK: type: LOAD +PREHOOK: Output: default@fact +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact +POSTHOOK: type: LOAD +POSTHOOK: Output: default@fact +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim3 +PREHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2))))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 + Stage-6 has a backup stage: Stage-1 + Stage-3 depends on stages: Stage-6 + Stage-7 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-7 + Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Conditional Operator + + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col7 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + fact + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + fact + TableScan + alias: fact + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 1 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + dim1 + TableScan + alias: dim1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col7 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + dim1 + TableScan + alias: dim1 + Reduce Output Operator + key expressions: + expr: f1 + type: int + sort order: + + Map-reduce partition columns: + expr: f1 + type: int + tag: 1 + value expressions: + expr: f2 + type: int + fact + TableScan + alias: fact + Reduce Output Operator + key expressions: + expr: d1 + type: int + sort order: + + Map-reduce partition columns: + expr: d1 + type: int + tag: 0 + value expressions: + expr: m1 + type: int + expr: m2 + type: int + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col7 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 +11 12 1 +21 22 1 +31 32 1 +41 42 1 +51 52 3 +61 62 3 +71 72 3 +81 82 3 +91 92 3 +PREHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL fact) d2) (. (TOK_TABLE_OR_COL dim2) f3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4))))) + +STAGE DEPENDENCIES: + Stage-9 is a root stage , consists of Stage-12, Stage-13, Stage-2 + Stage-12 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-12 + Stage-6 depends on stages: Stage-2, Stage-7, Stage-8 , consists of Stage-10, Stage-11, Stage-1 + Stage-10 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-10 + Stage-11 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-11 + Stage-1 + Stage-13 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-13 + Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-9 + Conditional Operator + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col3, _col7 + Position of Big Table: 0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + dim2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + Position of Big Table: 0 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + Position of Big Table: 1 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + dim2 + TableScan + alias: dim2 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col3 + type: int + sort order: + + Map-reduce partition columns: + expr: _col3 + type: int + tag: 0 + value expressions: + expr: _col7 + type: int + expr: _col0 + type: int + expr: _col1 + type: int + dim2 + TableScan + alias: dim2 + Reduce Output Operator + key expressions: + expr: f3 + type: int + sort order: + + Map-reduce partition columns: + expr: f3 + type: int + tag: 1 + value expressions: + expr: f4 + type: int + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col4} {VALUE._col5} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col1, _col4, _col5, _col11 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + fact + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + fact + TableScan + alias: fact + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 1 + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + dim1 + TableScan + alias: dim1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col3, _col7 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + dim1 + TableScan + alias: dim1 + Reduce Output Operator + key expressions: + expr: f1 + type: int + sort order: + + Map-reduce partition columns: + expr: f1 + type: int + tag: 1 + value expressions: + expr: f2 + type: int + fact + TableScan + alias: fact + Reduce Output Operator + key expressions: + expr: d1 + type: int + sort order: + + Map-reduce partition columns: + expr: d1 + type: int + tag: 0 + value expressions: + expr: m1 + type: int + expr: m2 + type: int + expr: d2 + type: int + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col3} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col3, _col7 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 2 +11 12 1 2 +21 22 1 2 +31 32 1 2 +41 42 1 2 +51 52 3 4 +61 62 3 4 +71 72 3 4 +81 82 3 4 +91 92 3 4 Index: ql/src/test/queries/clientpositive/join_star.q =================================================================== --- ql/src/test/queries/clientpositive/join_star.q (revision 0) +++ ql/src/test/queries/clientpositive/join_star.q (revision 0) @@ -0,0 +1,20 @@ +create table fact(m1 int, m2 int, d1 int, d2 int); +create table dim1(f1 int, f2 int); +create table dim2(f3 int, f4 int); +create table dim3(f5 int, f6 int); + +LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=5; + +explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; +select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; + +explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3; +select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3; +