Index: data/files/fact-data.txt =================================================================== --- data/files/fact-data.txt (revision 0) +++ data/files/fact-data.txt (revision 0) @@ -0,0 +1,10 @@ +1212 +111212 +212212 +313212 +414212 +515234 +616234 +717234 +818234 +919234 Index: data/files/dim-data.txt =================================================================== --- data/files/dim-data.txt (revision 0) +++ data/files/dim-data.txt (revision 0) @@ -0,0 +1,4 @@ +11 +22 +33 +44 Index: ql/src/test/results/clientpositive/join_star.q.out =================================================================== --- ql/src/test/results/clientpositive/join_star.q.out (revision 0) +++ ql/src/test/results/clientpositive/join_star.q.out (revision 0) @@ -0,0 +1,864 @@ +PREHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table fact(m1 int, m2 int, d1 int, d2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@fact +PREHOOK: query: create table dim1(f1 int, f2 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim1(f1 int, f2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim1 +PREHOOK: query: create table dim2(f3 int, f4 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim2(f3 int, f4 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim2 +PREHOOK: query: create table dim3(f5 int, f6 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim3(f5 int, f6 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim3 +PREHOOK: query: create table dim4(f7 int, f8 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim4(f7 int, f8 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim4 +PREHOOK: query: create table dim5(f9 int, f10 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim5(f9 int, f10 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim5 +PREHOOK: query: create table dim6(f11 int, f12 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim6(f11 int, f12 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim6 +PREHOOK: query: create table dim7(f13 int, f14 int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table dim7(f13 int, f14 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dim7 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact +PREHOOK: type: LOAD +PREHOOK: Output: default@fact +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact +POSTHOOK: type: LOAD +POSTHOOK: Output: default@fact +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim4 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim4 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim4 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim5 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim5 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim5 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim5 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim6 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim6 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim6 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim6 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim7 +PREHOOK: type: LOAD +PREHOOK: Output: default@dim7 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim7 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dim7 +PREHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + expr: _col7 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 +11 12 1 +21 22 1 +31 32 1 +41 42 1 +51 52 3 +61 62 3 +71 72 3 +81 82 3 +91 92 3 +PREHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL fact) d2) (. (TOK_TABLE_OR_COL dim2) f3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col3, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col3]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 2 +11 12 1 2 +21 22 1 2 +31 32 1 2 +41 42 1 2 +51 52 3 4 +61 62 3 4 +71 72 3 4 +81 82 3 4 +91 92 3 4 +PREHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL dim1) f2) (. (TOK_TABLE_OR_COL dim2) f3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 +11 12 1 1 +21 22 1 1 +31 32 1 1 +41 42 1 1 +51 52 3 3 +61 62 3 3 +71 72 3 3 +81 82 3 3 +91 92 3 3 +PREHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL dim1) f2) (. (TOK_TABLE_OR_COL dim2) f3)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + Position of Big Table: 0 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {m1} {m2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col11 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col4 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 +11 12 1 1 +21 22 1 1 +31 32 1 1 +41 42 1 1 +51 52 3 3 +61 62 3 3 +71 72 3 3 +81 82 3 3 +91 92 3 3 +PREHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +PREHOOK: type: QUERY +POSTHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME fact)) (TOK_TABREF (TOK_TABNAME dim1)) (= (. (TOK_TABLE_OR_COL fact) d1) (. (TOK_TABLE_OR_COL dim1) f1))) (TOK_TABREF (TOK_TABNAME dim2)) (= (. (TOK_TABLE_OR_COL dim1) f2) (. (TOK_TABLE_OR_COL dim2) f3))) (TOK_TABREF (TOK_TABNAME dim3)) (= (. (TOK_TABLE_OR_COL fact) d2) (. (TOK_TABLE_OR_COL dim3) f5))) (TOK_TABREF (TOK_TABNAME dim4)) (= (. (TOK_TABLE_OR_COL dim3) f6) (. (TOK_TABLE_OR_COL dim4) f7))) (TOK_TABREF (TOK_TABNAME dim5)) (= (. (TOK_TABLE_OR_COL dim4) f8) (. (TOK_TABLE_OR_COL dim5) f9))) (TOK_TABREF (TOK_TABNAME dim6)) (= (. (TOK_TABLE_OR_COL dim3) f6) (. (TOK_TABLE_OR_COL dim6) f11))) (TOK_TABREF (TOK_TABNAME dim7)) (= (. (TOK_TABLE_OR_COL dim6) f12) (. (TOK_TABLE_OR_COL dim7) f13)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1)) (TOK_SELEXPR (TOK_TABLE_OR_COL m2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_TABLE_OR_COL f4)) (TOK_SELEXPR (TOK_TABLE_OR_COL f6)) (TOK_SELEXPR (TOK_TABLE_OR_COL f8)) (TOK_SELEXPR (TOK_TABLE_OR_COL f10)) (TOK_SELEXPR (TOK_TABLE_OR_COL f12)) (TOK_SELEXPR (TOK_TABLE_OR_COL f14))))) + +STAGE DEPENDENCIES: + Stage-18 is a root stage + Stage-17 depends on stages: Stage-18 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-18 + Map Reduce Local Work + Alias -> Map Local Tables: + dim1 + Fetch Operator + limit: -1 + dim2 + Fetch Operator + limit: -1 + dim3 + Fetch Operator + limit: -1 + dim4 + Fetch Operator + limit: -1 + dim5 + Fetch Operator + limit: -1 + dim6 + Fetch Operator + limit: -1 + dim7 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + dim1 + TableScan + alias: dim1 + HashTable Sink Operator + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + Position of Big Table: 0 + dim2 + TableScan + alias: dim2 + HashTable Sink Operator + condition expressions: + 0 {_col7} {_col0} {_col1} {_col3} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + Position of Big Table: 0 + dim3 + TableScan + alias: dim3 + HashTable Sink Operator + condition expressions: + 0 {_col11} {_col1} {_col4} {_col5} + 1 {f6} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f5]] + Position of Big Table: 0 + dim4 + TableScan + alias: dim4 + HashTable Sink Operator + condition expressions: + 0 {_col15} {_col1} {_col5} {_col8} {_col9} + 1 {f8} + 2 {f12} + handleSkewJoin: false + keys: + 0 [Column[_col15]] + 1 [Column[f7]] + 2 [Column[f11]] + Position of Big Table: 0 + dim5 + TableScan + alias: dim5 + HashTable Sink Operator + condition expressions: + 0 {_col1} {_col5} {_col9} {_col12} {_col13} {_col23} {_col19} + 1 {f10} + handleSkewJoin: false + keys: + 0 [Column[_col19]] + 1 [Column[f9]] + Position of Big Table: 0 + dim6 + TableScan + alias: dim6 + HashTable Sink Operator + condition expressions: + 0 {_col15} {_col1} {_col5} {_col8} {_col9} + 1 {f8} + 2 {f12} + handleSkewJoin: false + keys: + 0 [Column[_col15]] + 1 [Column[f7]] + 2 [Column[f11]] + Position of Big Table: 0 + dim7 + TableScan + alias: dim7 + HashTable Sink Operator + condition expressions: + 0 {_col1} {_col5} {_col9} {_col12} {_col13} {_col19} {_col27} {_col23} + 1 {f14} + handleSkewJoin: false + keys: + 0 [Column[_col19]] + 1 [Column[f13]] + Position of Big Table: 0 + + Stage: Stage-17 + Map Reduce + Alias -> Map Operator Tree: + fact + TableScan + alias: fact + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {m1} {m2} {d2} + 1 {f2} + handleSkewJoin: false + keys: + 0 [Column[d1]] + 1 [Column[f1]] + outputColumnNames: _col0, _col1, _col3, _col7 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col7} {_col0} {_col1} {_col3} + 1 {f4} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f3]] + outputColumnNames: _col1, _col4, _col5, _col7, _col11 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col11} {_col1} {_col4} {_col5} + 1 {f6} + handleSkewJoin: false + keys: + 0 [Column[_col7]] + 1 [Column[f5]] + outputColumnNames: _col1, _col5, _col8, _col9, _col15 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {_col15} {_col1} {_col5} {_col8} {_col9} + 1 {f8} + 2 {f12} + handleSkewJoin: false + keys: + 0 [Column[_col15]] + 1 [Column[f7]] + 2 [Column[f11]] + outputColumnNames: _col1, _col5, _col9, _col12, _col13, _col19, _col23 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col1} {_col5} {_col9} {_col12} {_col13} {_col23} {_col19} + 1 {f10} + handleSkewJoin: false + keys: + 0 [Column[_col19]] + 1 [Column[f9]] + outputColumnNames: _col1, _col5, _col9, _col12, _col13, _col19, _col23, _col27 + Position of Big Table: 0 + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col1} {_col5} {_col9} {_col12} {_col13} {_col19} {_col27} {_col23} + 1 {f14} + handleSkewJoin: false + keys: + 0 [Column[_col19]] + 1 [Column[f13]] + outputColumnNames: _col1, _col5, _col9, _col12, _col13, _col19, _col23, _col27, _col31 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col12 + type: int + expr: _col13 + type: int + expr: _col9 + type: int + expr: _col5 + type: int + expr: _col1 + type: int + expr: _col27 + type: int + expr: _col23 + type: int + expr: _col19 + type: int + expr: _col31 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +PREHOOK: type: QUERY +PREHOOK: Input: default@dim1 +PREHOOK: Input: default@dim2 +PREHOOK: Input: default@dim3 +PREHOOK: Input: default@dim4 +PREHOOK: Input: default@dim5 +PREHOOK: Input: default@dim6 +PREHOOK: Input: default@dim7 +PREHOOK: Input: default@fact +#### A masked pattern was here #### +POSTHOOK: query: Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim1 +POSTHOOK: Input: default@dim2 +POSTHOOK: Input: default@dim3 +POSTHOOK: Input: default@dim4 +POSTHOOK: Input: default@dim5 +POSTHOOK: Input: default@dim6 +POSTHOOK: Input: default@dim7 +POSTHOOK: Input: default@fact +#### A masked pattern was here #### +1 2 1 1 2 2 2 2 2 +11 12 1 1 2 2 2 2 2 +21 22 1 1 2 2 2 2 2 +31 32 1 1 2 2 2 2 2 +41 42 1 1 2 2 2 2 2 +51 52 3 3 4 4 4 4 4 +61 62 3 3 4 4 4 4 4 +71 72 3 3 4 4 4 4 4 +81 82 3 3 4 4 4 4 4 +91 92 3 3 4 4 4 4 4 Index: ql/src/test/queries/clientpositive/join_star.q =================================================================== --- ql/src/test/queries/clientpositive/join_star.q (revision 0) +++ ql/src/test/queries/clientpositive/join_star.q (revision 0) @@ -0,0 +1,54 @@ +create table fact(m1 int, m2 int, d1 int, d2 int); +create table dim1(f1 int, f2 int); +create table dim2(f3 int, f4 int); +create table dim3(f5 int, f6 int); +create table dim4(f7 int, f8 int); +create table dim5(f9 int, f10 int); +create table dim6(f11 int, f12 int); +create table dim7(f13 int, f14 int); + +LOAD DATA LOCAL INPATH '../data/files/fact-data.txt' INTO TABLE fact; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim1; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim2; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim3; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim4; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim5; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim6; +LOAD DATA LOCAL INPATH '../data/files/dim-data.txt' INTO TABLE dim7; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=5000; + +explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; +select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1; + +explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3; +select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3; + +explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3; +select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3; + +explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3; +select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3; + +explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13; + +Select m1, m2, f2, f4, f6, f8, f10, f12, f14 + from fact + Left outer join dim1 on fact.d1= dim1.f1 + Left outer join dim2 on dim1.f2 = dim2.f3 + Left outer Join dim3 on fact.d2= dim3.f5 + Left outer Join dim4 on dim3.f6= dim4.f7 + Left outer join dim5 on dim4.f8= dim5.f9 + Left outer Join dim6 on dim3.f6= dim6.f11 + Left outer Join dim7 on dim6.f12 = dim7.f13; + Index: ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (revision 1470815) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java (working copy) @@ -54,6 +54,7 @@ private static final long serialVersionUID = 1L; private static final Log LOG = LogFactory.getLog(HashTableSinkOperator.class.getName()); + protected static MapJoinMetaData metadata = new MapJoinMetaData(); // from abstract map join operator /** * The expressions for join inputs's join keys. @@ -164,6 +165,10 @@ } + public static MapJoinMetaData getMetadata() { + return metadata; + } + private static final transient String[] FATAL_ERR_MSG = { null, // counter value 0 means no error "Mapside join exceeds available memory. " @@ -301,8 +306,7 @@ null); keySerializer.initialize(null, keyTableDesc.getProperties()); - MapJoinMetaData.clear(); - MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx( + metadata.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx( ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, false, hconf)); } @@ -349,7 +353,8 @@ // Construct externalizable objects for key and value if (needNewKey) { - MapJoinObjectValue valueObj = new MapJoinObjectValue(metadataValueTag[tag], res); + MapJoinObjectValue valueObj = new MapJoinObjectValue( + metadataValueTag[tag], res); rowNumber++; if (rowNumber > hashTableScale && rowNumber % hashTableScale == 0) { @@ -391,7 +396,7 @@ .getStandardStructObjectInspector(newNames, newFields); int alias = Integer.valueOf(metadataValueTag[tag]); - MapJoinMetaData.put(alias, new HashTableSinkObjectCtx( + metadata.put(Integer.valueOf(metadataValueTag[tag]), new HashTableSinkObjectCtx( standardOI, valueSerDe, valueTableDesc, hasFilter(alias), hconf)); } @@ -435,7 +440,7 @@ super.closeOp(abort); } catch (Exception e) { - LOG.error("Generate Hashtable error"); + LOG.error("Generate Hashtable error", e); e.printStackTrace(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (revision 1470815) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (working copy) @@ -24,8 +24,10 @@ import java.io.ObjectOutput; import java.util.ArrayList; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; -import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -89,7 +91,8 @@ metadataTag = in.readInt(); // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = MapJoinOperator.getMetadata().get( + Integer.valueOf(metadataTag)); int sz = in.readInt(); MapJoinRowContainer res = new MapJoinRowContainer(); if (sz > 0) { @@ -132,7 +135,8 @@ out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = HashTableSinkOperator.getMetadata().get( + Integer.valueOf(metadataTag)); // Different processing for key and value MapJoinRowContainer v = obj; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java (revision 1470815) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinDoubleKeys.java (working copy) @@ -23,8 +23,10 @@ import java.io.ObjectOutput; import java.util.ArrayList; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; -import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -93,7 +95,7 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { try { // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = MapJoinOperator.getMetadata().get(Integer.valueOf(metadataTag)); Writable val = ctx.getSerDe().getSerializedClass().newInstance(); val.readFields(in); @@ -124,7 +126,8 @@ try { // out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = HashTableSinkOperator.getMetadata().get( + Integer.valueOf(metadataTag)); ArrayList list = MapJoinMetaData.getList(); list.add(obj1); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinSingleKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinSingleKey.java (revision 1470815) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinSingleKey.java (working copy) @@ -23,8 +23,10 @@ import java.io.ObjectOutput; import java.util.ArrayList; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; -import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -74,10 +76,12 @@ } @Override - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + public void readExternal(ObjectInput in) + throws IOException, ClassNotFoundException { try { // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = MapJoinOperator.getMetadata().get( + Integer.valueOf(metadataTag)); Writable val = ctx.getSerDe().getSerializedClass().newInstance(); val.readFields(in); @@ -106,7 +110,8 @@ try { // out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get(Integer.valueOf(metadataTag)); + HashTableSinkObjectCtx ctx = HashTableSinkOperator.getMetadata().get( + Integer.valueOf(metadataTag)); ArrayList list = MapJoinMetaData.getList(); list.add(obj); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (revision 1470815) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (working copy) @@ -23,8 +23,9 @@ import java.io.ObjectOutput; import java.util.ArrayList; -import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; +import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator; import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -95,7 +96,7 @@ ClassNotFoundException { try { // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get( + HashTableSinkObjectCtx ctx = MapJoinOperator.getMetadata().get( Integer.valueOf(metadataTag)); Writable val = ctx.getSerDe().getSerializedClass().newInstance(); @@ -119,7 +120,7 @@ public void writeExternal(ObjectOutput out) throws IOException { try { // get the tableDesc from the map stored in the mapjoin operator - HashTableSinkObjectCtx ctx = MapJoinMetaData.get( + HashTableSinkObjectCtx ctx = HashTableSinkOperator.getMetadata().get( Integer.valueOf(metadataTag)); // Different processing for key and value Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 1470815) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -54,6 +54,11 @@ protected transient HashMapWrapper[] mapJoinTables; + protected static MapJoinMetaData metadata = new MapJoinMetaData(); + public static MapJoinMetaData getMetadata() { + return metadata; + } + private static final transient String[] FATAL_ERR_MSG = { null, // counter value 0 means no error "Mapside join exceeds available memory. " @@ -117,7 +122,7 @@ SerDe keySerializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null); keySerializer.initialize(null, keyTableDesc.getProperties()); - MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx( + metadata.put(Integer.valueOf(metadataKeyTag), new HashTableSinkObjectCtx( ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE), keySerializer, keyTableDesc, false, hconf)); @@ -136,7 +141,7 @@ valueSerDe.initialize(null, valueTableDesc.getProperties()); ObjectInspector inspector = valueSerDe.getObjectInspector(); - MapJoinMetaData.put(Integer.valueOf(pos), new HashTableSinkObjectCtx(ObjectInspectorUtils + metadata.put(Integer.valueOf(pos), new HashTableSinkObjectCtx(ObjectInspectorUtils .getStandardObjectInspector(inspector, ObjectInspectorCopyOption.WRITABLE), valueSerDe, valueTableDesc, hasFilter(pos), hconf)); } @@ -189,8 +194,8 @@ hashtable.initilizePersistentHash(path.toUri().getPath()); } } catch (Exception e) { - LOG.error("Load Distributed Cache Error"); - throw new HiveException(e.getMessage()); + LOG.error("Load Distributed Cache Error", e); + throw new HiveException(e); } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java (revision 1470815) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java (working copy) @@ -24,20 +24,21 @@ import org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.HashTableSinkObjectCtx; public class MapJoinMetaData { - static transient Map mapMetadata = new HashMap(); + transient Map mapMetadata = + new HashMap(); static ArrayList list = new ArrayList(); public MapJoinMetaData(){ } - public static void put(Integer key, HashTableSinkObjectCtx value){ + public void put(Integer key, HashTableSinkObjectCtx value){ mapMetadata.put(key, value); } - public static HashTableSinkObjectCtx get(Integer key){ + public HashTableSinkObjectCtx get(Integer key){ return mapMetadata.get(key); } - public static void clear(){ + public void clear(){ mapMetadata.clear(); } @@ -45,5 +46,4 @@ list.clear(); return list; } - }