Index: data/files/fact-data.txt =================================================================== --- data/files/fact-data.txt (revision 0) +++ data/files/fact-data.txt (revision 0) @@ -0,0 +1,10 @@ +1212 +111212 +212212 +313212 +414212 +515234 +616234 +717234 +818234 +919234 Index: data/files/dim-data.txt =================================================================== --- data/files/dim-data.txt (revision 0) +++ data/files/dim-data.txt (revision 0) @@ -0,0 +1,4 @@ +11 +22 +33 +44 Index: ql/src/test/results/clientpositive/join_star.q.out =================================================================== --- ql/src/test/results/clientpositive/join_star.q.out (revision 0) +++ ql/src/test/results/clientpositive/join_star.q.out (revision 0) @@ -0,0 +1,414 @@ +PREHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@fact +PREHOOK: query: create table dim1(f1 int, f2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim1(f1 int, f2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim1 +PREHOOK: query: create table dim2(f3 int, f4 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim2(f3 int, f4 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim2 +PREHOOK: query: create table dim3(f5 int, f6 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim3(f5 int, f6 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact +PREHOOK: type: LOAD +PREHOOK: Output: default@fact +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact +POSTHOOK: type: LOAD +POSTHOOK: Output: default@fact +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim3 +PREHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col7 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 +11 12 1 +21 22 1 +31 32 1 +41 42 1 +51 52 3 +61 62 3 +71 72 3 +81 82 3 +91 92 3 +PREHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL fact) d2) (. (TOK_TABLE_OR_COL dim2) f3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col3, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 2 +11 12 1 2 +21 22 1 2 +31 32 1 2 +41 42 1 2 +51 52 3 4 +61 62 3 4 +71 72 3 4 +81 82 3 4 +91 92 3 4 +PREHOOK: query: explain select m1, m2, f2, f4, f6 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 join dim3 on dim2.f3=dim3.f5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4, f6 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 join dim3 on dim2.f3=dim3.f5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL fact) d2) (. (TOK_TABLE_OR_COL dim2) f3))) (TOK_TABREF (TOK_TABNAME dim3)) (= (. (TOK_TABLE_OR_COL dim2) f3) (. (TOK_TABLE_OR_COL dim3) f5)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4)) (TOK_SELEXPR (TOK_TABLE_OR_COL f6))))) + +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + dim3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + 2 {f6} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + 2 [Column[f5]] + Position of Big Table: 0 + dim3 + TableScan + alias: dim3 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + 2 {f6} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + 2 [Column[f5]] + Position of Big Table: 0 + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col3, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + 2 {f6} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + 2 [Column[f5]] + outputColumnNames: _col1, _col4, _col5, _col11, _col15 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + expr: _col15 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4, f6 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 join dim3 on dim2.f3=dim3.f5 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@dim3 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4, f6 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 join dim3 on dim2.f3=dim3.f5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@dim3 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 2 2 +11 12 1 2 2 +21 22 1 2 2 +31 32 1 2 2 +41 42 1 2 2 +51 52 3 4 4 +61 62 3 4 4 +71 72 3 4 4 +81 82 3 4 4 +91 92 3 4 4 Index: ql/src/test/queries/clientpositive/join_star.q =================================================================== --- ql/src/test/queries/clientpositive/join_star.q (revision 0) +++ ql/src/test/queries/clientpositive/join_star.q (revision 0) @@ -0,0 +1,25 @@ +create table fact(m1 int, m2 int, d1 int, d2 int); +create table dim1(f1 int, f2 int); +create table dim2(f3 int, f4 int); +create table dim3(f5 int, f6 int); + +LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=500; + +explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; + +select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; + +explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3; + +select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3; + +explain select m1, m2, f2, f4, f6 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 join dim3 on dim2.f3=dim3.f5; + +select m1, m2, f2, f4, f6 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 join dim3 on dim2.f3=dim3.f5;