Index: data/files/fact-data.txt =================================================================== --- data/files/fact-data.txt (revision 0) +++ data/files/fact-data.txt (revision 0) @@ -0,0 +1,10 @@ +1212 +111212 +212212 +313212 +414212 +515234 +616234 +717234 +818234 +919234 Index: data/files/dim-data.txt =================================================================== --- data/files/dim-data.txt (revision 0) +++ data/files/dim-data.txt (revision 0) @@ -0,0 +1,4 @@ +11 +22 +33 +44 Index: ql/src/test/results/clientpositive/join_star.q.out =================================================================== --- ql/src/test/results/clientpositive/join_star.q.out (revision 0) +++ ql/src/test/results/clientpositive/join_star.q.out (revision 0) @@ -0,0 +1,864 @@ +PREHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@fact +PREHOOK: query: create table dim1(f1 int, f2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim1(f1 int, f2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim1 +PREHOOK: query: create table dim2(f3 int, f4 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim2(f3 int, f4 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim2 +PREHOOK: query: create table dim3(f5 int, f6 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim3(f5 int, f6 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim3 +PREHOOK: query: create table dim4(f7 int, f8 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim4(f7 int, f8 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim4 +PREHOOK: query: create table dim5(f9 int, f10 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim5(f9 int, f10 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim5 +PREHOOK: query: create table dim6(f11 int, f12 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim6(f11 int, f12 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim6 +PREHOOK: query: create table dim7(f13 int, f14 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim7(f13 int, f14 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim7 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact +PREHOOK: type: LOAD +PREHOOK: Output: default@fact +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact +POSTHOOK: type: LOAD +POSTHOOK: Output: default@fact +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim4 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim4 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim4 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim5 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim5 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim5 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim5 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim6 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim6 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim6 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim6 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim7 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim7 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim7 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim7 +PREHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col7 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 +11 12 1 +21 22 1 +31 32 1 +41 42 1 +51 52 3 +61 62 3 +71 72 3 +81 82 3 +91 92 3 +PREHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL fact) d2) (. (TOK_TABLE_OR_COL dim2) f3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col3, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 2 +11 12 1 2 +21 22 1 2 +31 32 1 2 +41 42 1 2 +51 52 3 4 +61 62 3 4 +71 72 3 4 +81 82 3 4 +91 92 3 4 +PREHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL dim1) f2) (. (TOK_TABLE_OR_COL dim2) f3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 +11 12 1 1 +21 22 1 1 +31 32 1 1 +41 42 1 1 +51 52 3 3 +61 62 3 3 +71 72 3 3 +81 82 3 3 +91 92 3 3 +PREHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL dim1) f2) (. (TOK_TABLE_OR_COL dim2) f3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 +11 12 1 1 +21 22 1 1 +31 32 1 1 +41 42 1 1 +51 52 3 3 +61 62 3 3 +71 72 3 3 +81 82 3 3 +91 92 3 3 +PREHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +PREHOOK: type: QUERY +POSTHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL dim1) f2) (. (TOK_TABLE_OR_COL dim2) f3))) (TOK_TABREF (TOK_TABNAME dim3)) (= (. (TOK_TABLE_OR_COL fact) d2) (. (TOK_TABLE_OR_COL dim3) f5))) (TOK_TABREF (TOK_TABNAME dim4)) (= (. (TOK_TABLE_OR_COL dim3) f6) (. (TOK_TABLE_OR_COL dim4) f7))) (TOK_TABREF (TOK_TABNAME dim5)) (= (. (TOK_TABLE_OR_COL dim4) f8) (. (TOK_TABLE_OR_COL dim5) f9))) (TOK_TABREF (TOK_TABNAME dim6)) (= (. (TOK_TABLE_OR_COL dim3) f6) (. (TOK_TABLE_OR_COL dim6) f11))) (TOK_TABREF (TOK_TABNAME dim7)) (= (. (TOK_TABLE_OR_COL dim6) f12) (. (TOK_TABLE_OR_COL dim7) f13)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4)) (TOK_SELEXPR (TOK_TABLE_OR_COL f6)) (TOK_SELEXPR (TOK_TABLE_OR_COL f8)) (TOK_SELEXPR (TOK_TABLE_OR_COL f10)) (TOK_SELEXPR (TOK_TABLE_OR_COL f12)) (TOK_SELEXPR (TOK_TABLE_OR_COL f14))))) + +STAGE DEPENDENCIES: + Stage-18 is a root stage + Stage-17 depends on stages: Stage-18 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-18 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + dim3 + Fetch Operator + limit: -1 + dim4 + Fetch Operator + limit: -1 + dim5 + Fetch Operator + limit: -1 + dim6 + Fetch Operator + limit: -1 + dim7 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} {_col3} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + Position of Big Table: 0 + dim3 + TableScan + alias: dim3 + HashTable Sink Operator + condition expressions: + 0 {_col11} {_col1} {_col4} {_col5} + 1 {f6} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f5]] + Position of Big Table: 0 + dim4 + TableScan + alias: dim4 + HashTable Sink Operator + condition expressions: + 0 {_col15} {_col1} {_col5} {_col8} {_col9} + 1 {f8} + 2 {f12} + handleSkewJoin: false + keys: + 0 [Column[_col15]] + 1 [Column[f7]] + 2 [Column[f11]] + Position of Big Table: 0 + dim5 + TableScan + alias: dim5 + HashTable Sink Operator + condition expressions: + 0 {_col1} {_col5} {_col9} {_col12} {_col13} {_col23} {_col19} + 1 {f10} + handleSkewJoin: false + keys: + 0 [Column[_col19]] + 1 [Column[f9]] + Position of Big Table: 0 + dim6 + TableScan + alias: dim6 + HashTable Sink Operator + condition expressions: + 0 {_col15} {_col1} {_col5} {_col8} {_col9} + 1 {f8} + 2 {f12} + handleSkewJoin: false + keys: + 0 [Column[_col15]] + 1 [Column[f7]] + 2 [Column[f11]] + Position of Big Table: 0 + dim7 + TableScan + alias: dim7 + HashTable Sink Operator + condition expressions: + 0 {_col1} {_col5} {_col9} {_col12} {_col13} {_col19} {_col27} {_col23} + 1 {f14} + handleSkewJoin: false + keys: + 0 [Column[_col19]] + 1 [Column[f13]] + Position of Big Table: 0 + + Stage: Stage-17 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col3, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} {_col3} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col7, _col11 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col11} {_col1} {_col4} {_col5} + 1 {f6} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f5]] + outputColumnNames: _col1, _col5, _col8, _col9, _col15 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {_col15} {_col1} {_col5} {_col8} {_col9} + 1 {f8} + 2 {f12} + handleSkewJoin: false + keys: + 0 [Column[_col15]] + 1 [Column[f7]] + 2 [Column[f11]] + outputColumnNames: _col1, _col5, _col9, _col12, _col13, _col19, _col23 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col1} {_col5} {_col9} {_col12} {_col13} {_col23} {_col19} + 1 {f10} + handleSkewJoin: false + keys: + 0 [Column[_col19]] + 1 [Column[f9]] + outputColumnNames: _col1, _col5, _col9, _col12, _col13, _col19, _col23, _col27 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col1} {_col5} {_col9} {_col12} {_col13} {_col19} {_col27} {_col23} + 1 {f14} + handleSkewJoin: false + keys: + 0 [Column[_col19]] + 1 [Column[f13]] + outputColumnNames: _col1, _col5, _col9, _col12, _col13, _col19, _col23, _col27, _col31 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col12 + type: int + expr: _col13 + type: int + expr: _col9 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col27 + type: int + expr: _col23 + type: int + expr: _col19 + type: int + expr: _col31 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@dim3 +PREHOOK: Input: default@dim4 +PREHOOK: Input: default@dim5 +PREHOOK: Input: default@dim6 +PREHOOK: Input: default@dim7 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@dim3 +POSTHOOK: Input: default@dim4 +POSTHOOK: Input: default@dim5 +POSTHOOK: Input: default@dim6 +POSTHOOK: Input: default@dim7 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 2 2 2 2 2 +11 12 1 1 2 2 2 2 2 +21 22 1 1 2 2 2 2 2 +31 32 1 1 2 2 2 2 2 +41 42 1 1 2 2 2 2 2 +51 52 3 3 4 4 4 4 4 +61 62 3 3 4 4 4 4 4 +71 72 3 3 4 4 4 4 4 +81 82 3 3 4 4 4 4 4 +91 92 3 3 4 4 4 4 4 Index: ql/src/test/queries/clientpositive/join_star.q =================================================================== --- ql/src/test/queries/clientpositive/join_star.q (revision 0) +++ ql/src/test/queries/clientpositive/join_star.q (revision 0) @@ -0,0 +1,54 @@ +create table fact(m1 int, m2 int, d1 int, d2 int); +create table dim1(f1 int, f2 int); +create table dim2(f3 int, f4 int); +create table dim3(f5 int, f6 int); +create table dim4(f7 int, f8 int); +create table dim5(f9 int, f10 int); +create table dim6(f11 int, f12 int); +create table dim7(f13 int, f14 int); + +LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim4; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim5; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim6; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim7; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=5000; + +explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; +select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; + +explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3; +select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3; + +explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3; +select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3; + +explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3; +select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3; + +explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13; + +Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13; + Index: ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (working copy) @@ -57,6 +57,7 @@ private static final long serialVersionUID = 1L; private static final Log LOG = LogFactory.getLog(HashTableSinkOperator.class.getName()); + protected MapJoinMetaData metadata = new MapJoinMetaData(); // from abstract map join operator /** * The expressions for join inputs's join keys. @@ -161,6 +162,10 @@ } + public MapJoinMetaData getMetadata() { + return metadata; + } + private static final transient String[] FATAL_ERR_MSG = { null, // counter value 0 means no error "Mapside join exceeds available memory. " @@ -292,8 +297,8 @@ null); keySerializer.initialize(null, keyTableDesc.getProperties()); - MapJoinMetaData.clear(); - MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx( + metadata.clear(); + metadata.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx( ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, hconf)); } @@ -314,7 +319,7 @@ // compute keys and values as StandardObjects AbstractMapJoinKey keyMap = JoinUtil.computeMapJoinKeys(row, joinKeys.get(alias), - joinKeysObjectInspectors.get(alias)); + joinKeysObjectInspectors.get(alias), metadata); Object[] value = JoinUtil.computeMapJoinValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors @@ -338,7 +343,8 @@ // Construct externalizable objects for key and value if (needNewKey) { - MapJoinObjectValue valueObj = new MapJoinObjectValue(metadataValueTag[tag], res); + MapJoinObjectValue valueObj = new MapJoinObjectValue( + metadataValueTag[tag], res, metadata); rowNumber++; if (rowNumber > hashTableScale && rowNumber % hashTableScale == 0) { @@ -379,7 +385,7 @@ StandardStructObjectInspector standardOI = ObjectInspectorFactory .getStandardStructObjectInspector(newNames, newFields); - MapJoinMetaData.put(Integer.valueOf(metadataValueTag[tag]), new HashTableSinkObjectCtx( + metadata.put(Integer.valueOf(metadataValueTag[tag]), new HashTableSinkObjectCtx( standardOI, valueSerDe, valueTableDesc, hconf)); } @@ -422,7 +428,7 @@ super.closeOp(abort); } catch (Exception e) { - LOG.error("Generate Hashtable error"); + LOG.error("Generate Hashtable error", e); e.printStackTrace(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java (working copy) @@ -19,7 +19,6 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -37,15 +36,15 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.ReflectionUtils; @@ -157,7 +156,8 @@ * standard ObjectInspector. */ public static AbstractMapJoinKey computeMapJoinKeys(Object row, - List keyFields, List keyFieldsOI) + List keyFields, List keyFieldsOI, + MapJoinMetaData metadata) throws HiveException { int size = keyFields.size(); @@ -165,7 +165,7 @@ Object obj = (ObjectInspectorUtils.copyToStandardObject(keyFields.get(0) .evaluate(row), keyFieldsOI.get(0), ObjectInspectorCopyOption.WRITABLE)); - MapJoinSingleKey key = new MapJoinSingleKey(obj); + MapJoinSingleKey key = new MapJoinSingleKey(obj, metadata); return key; }else if(size == 2){ Object obj1 = (ObjectInspectorUtils.copyToStandardObject(keyFields.get(0) @@ -176,7 +176,7 @@ .evaluate(row), keyFieldsOI.get(1), ObjectInspectorCopyOption.WRITABLE)); - MapJoinDoubleKeys key = new MapJoinDoubleKeys(obj1,obj2); + MapJoinDoubleKeys key = new MapJoinDoubleKeys(obj1,obj2, metadata); return key; }else{ // Compute the keys @@ -187,7 +187,7 @@ .evaluate(row), keyFieldsOI.get(i), ObjectInspectorCopyOption.WRITABLE)); } - MapJoinObjectKey key = new MapJoinObjectKey(nr); + MapJoinObjectKey key = new MapJoinObjectKey(nr, metadata); return key; } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (working copy) @@ -24,8 +24,9 @@ import java.io.ObjectOutput; import java.util.ArrayList; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; -import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -39,19 +40,18 @@ protected transient int metadataTag; protected transient MapJoinRowContainer obj; + protected MapJoinMetaData metadata; - - public MapJoinObjectValue() { - } - /** * @param metadataTag * @param obj */ - public MapJoinObjectValue(int metadataTag, MapJoinRowContainer obj) { + public MapJoinObjectValue(int metadataTag, MapJoinRowContainer obj, + MapJoinMetaData metadata) { this.metadataTag = metadataTag; + this.metadata = metadata; this.obj = obj; } @@ -84,7 +84,7 @@ metadataTag = in.readInt(); // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = MapJoinOperator.getMetadata().get(Integer.valueOf(metadataTag)); int sz = in.readInt(); MapJoinRowContainer res = new MapJoinRowContainer(); @@ -124,7 +124,7 @@ out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = metadata.get(Integer.valueOf(metadataTag)); // Different processing for key and value MapJoinRowContainer v = obj; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java (working copy) @@ -23,8 +23,9 @@ import java.io.ObjectOutput; import java.util.ArrayList; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; -import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -35,15 +36,14 @@ protected transient Object obj1; protected transient Object obj2; - public MapJoinDoubleKeys() { } - /** * @param obj1 * @param obj2 */ - public MapJoinDoubleKeys(Object obj1, Object obj2) { + public MapJoinDoubleKeys(Object obj1, Object obj2, MapJoinMetaData metadata) { + super(metadata); this.obj1 = obj1; this.obj2 = obj2; } @@ -93,7 +93,7 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { try { // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = MapJoinOperator.getMetadata().get(Integer.valueOf(metadataTag)); Writable val = ctx.getSerDe().getSerializedClass().newInstance(); val.readFields(in); @@ -124,7 +124,7 @@ try { // out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = getMetadata().get(Integer.valueOf(metadataTag)); ArrayList list = MapJoinMetaData.getList(); list.add(obj1); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinSingleKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinSingleKey.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinSingleKey.java (working copy) @@ -23,8 +23,9 @@ import java.io.ObjectOutput; import java.util.ArrayList; -import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -37,11 +38,8 @@ public MapJoinSingleKey() { } - - /** - * @param obj - */ - public MapJoinSingleKey(Object obj) { + public MapJoinSingleKey(Object obj, MapJoinMetaData metadata) { + super(metadata); this.obj = obj; } @@ -77,7 +75,7 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { try { // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = MapJoinOperator.getMetadata().get(Integer.valueOf(metadataTag)); Writable val = ctx.getSerDe().getSerializedClass().newInstance(); val.readFields(in); @@ -106,7 +104,7 @@ try { // out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = getMetadata().get(Integer.valueOf(metadataTag)); ArrayList list = MapJoinMetaData.getList(); list.add(obj); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractMapJoinKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractMapJoinKey.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/AbstractMapJoinKey.java (working copy) @@ -23,16 +23,27 @@ import java.io.ObjectInput; import java.io.ObjectOutput; +import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; + /** * Map Join Object used for both key. */ public abstract class AbstractMapJoinKey implements Externalizable { protected static int metadataTag = -1; + protected MapJoinMetaData metadata; public AbstractMapJoinKey() { } + public AbstractMapJoinKey(MapJoinMetaData metadata) { + this.metadata = metadata; + } + + protected MapJoinMetaData getMetadata() { + return metadata; + } + @Override public abstract boolean equals(Object o); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (working copy) @@ -23,8 +23,9 @@ import java.io.ObjectOutput; import java.util.ArrayList; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; -import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -44,7 +45,8 @@ /** * @param obj */ - public MapJoinObjectKey(Object[] obj) { + public MapJoinObjectKey(Object[] obj, MapJoinMetaData metadata) { + super(metadata); this.obj = obj; } @@ -95,7 +97,7 @@ ClassNotFoundException { try { // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get( + HashTableSinkObjectCtx ctx = MapJoinOperator.getMetadata().get( Integer.valueOf(metadataTag)); Writable val = ctx.getSerDe().getSerializedClass().newInstance(); @@ -119,7 +121,7 @@ public void writeExternal(ObjectOutput out) throws IOException { try { // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get( + HashTableSinkObjectCtx ctx = getMetadata().get( Integer.valueOf(metadataTag)); // Different processing for key and value Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -55,6 +55,11 @@ protected transient Map> mapJoinTables; + protected static MapJoinMetaData metadata = new MapJoinMetaData(); + public static MapJoinMetaData getMetadata() { + return metadata; + } + private static final transient String[] FATAL_ERR_MSG = { null, // counter value 0 means no error "Mapside join exceeds available memory. " @@ -115,7 +120,7 @@ SerDe keySerializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null); keySerializer.initialize(null, keyTableDesc.getProperties()); - MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx( + metadata.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx( ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, hconf)); @@ -133,7 +138,7 @@ null); valueSerDe.initialize(null, valueTableDesc.getProperties()); - MapJoinMetaData.put(Integer.valueOf(pos), new HashTableSinkObjectCtx(ObjectInspectorUtils + metadata.put(Integer.valueOf(pos), new HashTableSinkObjectCtx(ObjectInspectorUtils .getStandardObjectInspector(valueSerDe.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE), valueSerDe, valueTableDesc, hconf)); } @@ -185,8 +190,8 @@ hashtable.initilizePersistentHash(path.toUri().getPath()); } } catch (Exception e) { - LOG.error("Load Distributed Cache Error"); - throw new HiveException(e.getMessage()); + LOG.error("Load Distributed Cache Error", e); + throw new HiveException(e); } } @@ -225,7 +230,7 @@ // compute keys and values as StandardObjects AbstractMapJoinKey key = JoinUtil.computeMapJoinKeys(row, joinKeys.get(alias), - joinKeysObjectInspectors.get(alias)); + joinKeysObjectInspectors.get(alias), metadata); ArrayList value = JoinUtil.computeValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors .get(alias), filterMap == null ? null : filterMap[alias]); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java (revision 1448522) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java (working copy) @@ -21,23 +21,26 @@ import java.util.HashMap; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; public class MapJoinMetaData { - static transient Map mapMetadata = new HashMap(); + transient Map mapMetadata = new HashMap(); static ArrayList list = new ArrayList(); + private static final Log LOG = LogFactory.getLog(MapJoinMetaData.class.getName()); public MapJoinMetaData(){ } - public static void put(Integer key, HashTableSinkObjectCtx value){ + public void put(Integer key, HashTableSinkObjectCtx value){ mapMetadata.put(key, value); } - public static HashTableSinkObjectCtx get(Integer key){ + public HashTableSinkObjectCtx get(Integer key){ return mapMetadata.get(key); } - public static void clear(){ + public void clear(){ mapMetadata.clear(); } @@ -45,5 +48,4 @@ list.clear(); return list; } - }