diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 8f2804f..a62565f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -130,7 +130,12 @@ public class MapJoinOperator extends AbstractMapJoinOperator implem } - TableDesc valueTableDesc = conf.getValueTblDescs().get(tag); + TableDesc valueTableDesc; + if (conf.getNoOuterJoin()) { + valueTableDesc = conf.getValueTblDescs().get(tag); + } else { + valueTableDesc = conf.getValueFilteredTblDescs().get(tag); + } SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null); valueSerDe.initialize(null, valueTableDesc.getProperties()); diff --git ql/src/test/queries/clientpositive/mapjoin_filter_on_outerjoin.q ql/src/test/queries/clientpositive/mapjoin_filter_on_outerjoin.q new file mode 100644 index 0000000..0acd2fc --- /dev/null +++ ql/src/test/queries/clientpositive/mapjoin_filter_on_outerjoin.q @@ -0,0 +1,18 @@ +--HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition + +explain +SELECT /*+ mapjoin(src1, src2) */ * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key; + +SELECT /*+ mapjoin(src1, src2) */ * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key; + +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key; + diff --git ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out new file mode 100644 index 0000000..553286e --- /dev/null +++ ql/src/test/results/clientpositive/mapjoin_filter_on_outerjoin.q.out @@ -0,0 +1,330 @@ +PREHOOK: query: --HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition + +explain +SELECT /*+ mapjoin(src1, src2) */ * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key +PREHOOK: type: QUERY +POSTHOOK: query: --HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition + +explain +SELECT /*+ mapjoin(src1, src2) */ * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_RIGHTOUTERJOIN (TOK_TABREF (TOK_TABNAME src) src1) (TOK_TABREF (TOK_TABNAME src) src2) (AND (AND (= (. (TOK_TABLE_OR_COL src1) key) (. (TOK_TABLE_OR_COL src2) key)) (< (. (TOK_TABLE_OR_COL src1) key) 10)) (> (. (TOK_TABLE_OR_COL src2) key) 10))) (TOK_TABREF (TOK_TABNAME src) src3) (AND (= (. (TOK_TABLE_OR_COL src2) key) (. (TOK_TABLE_OR_COL src3) key)) (< (. (TOK_TABLE_OR_COL src3) key) 10)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src1 src2))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src1) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src2) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL src3) key))))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Filter Operator + predicate: + expr: (key < 10.0) + type: boolean + HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10.0)} + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + Position of Big Table: 2 + src2 + TableScan + alias: src2 + HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10.0)} + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + Position of Big Table: 2 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src3 + TableScan + alias: src3 + Filter Operator + predicate: + expr: (key < 10.0) + type: boolean + Map Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10.0)} + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Position of Big Table: 2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col2 + type: string + expr: _col4 + type: string + sort order: +++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 2 val_2 2 val_2 +NULL NULL 4 val_4 4 val_4 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 8 val_8 8 val_8 +NULL NULL 9 val_9 9 val_9 +PREHOOK: query: SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) + SORT BY src1.key, src2.key, src3.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 0 val_0 0 val_0 +NULL NULL 2 val_2 2 val_2 +NULL NULL 4 val_4 4 val_4 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 5 val_5 5 val_5 +NULL NULL 8 val_8 8 val_8 +NULL NULL 9 val_9 9 val_9