Index: ql/src/test/results/clientpositive/ppd_outer_join4.q.out =================================================================== --- ql/src/test/results/clientpositive/ppd_outer_join4.q.out (revision 1163875) +++ ql/src/test/results/clientpositive/ppd_outer_join4.q.out (working copy) @@ -70,18 +70,22 @@ c TableScan alias: c - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string + Filter Operator + predicate: + expr: (sqrt(key) <> 13) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 2 + value expressions: + expr: key + type: string Reduce Operator Tree: Join Operator condition map: @@ -134,7 +138,7 @@ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-03-22_02-38-11_041_8830294243573092446/-mr-10000 +PREHOOK: Output: file:/var/folders/nt/ng21tg0n1jl4547lw0k8lg6hq_nw87/T/charleschen/hive_2011-08-31_17-04-03_652_7389043450109466394/-mr-10000 POSTHOOK: query: FROM src a LEFT OUTER JOIN @@ -147,7 +151,7 @@ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-03-22_02-38-11_041_8830294243573092446/-mr-10000 +POSTHOOK: Output: file:/var/folders/nt/ng21tg0n1jl4547lw0k8lg6hq_nw87/T/charleschen/hive_2011-08-31_17-04-03_652_7389043450109466394/-mr-10000 150 val_150 150 val_150 150 152 val_152 152 val_152 152 152 val_152 152 val_152 152 @@ -450,18 +454,22 @@ c TableScan alias: c - Reduce Output Operator - key expressions: - expr: key - type: string - sort order: + - Map-reduce partition columns: - expr: key - type: string - tag: 2 - value expressions: - expr: key - type: string + Filter Operator + predicate: + expr: (sqrt(key) <> 13) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 2 + value expressions: + expr: key + type: string Reduce Operator Tree: Join Operator condition map: @@ -475,7 +483,7 @@ outputColumnNames: _col0, _col1, _col4, _col5, _col8 Filter Operator predicate: - expr: ((((_col4 > '15') and (_col4 < '25')) and (sqrt(_col8) <> 13)) and ((_col0 > '10') and (_col0 < '20'))) + expr: (((_col4 > '15') and (_col4 < '25')) and ((_col0 > '10') and (_col0 < '20'))) type: boolean Select Operator expressions: @@ -514,7 +522,7 @@ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-03-22_02-38-20_602_416360783321217123/-mr-10000 +PREHOOK: Output: file:/var/folders/nt/ng21tg0n1jl4547lw0k8lg6hq_nw87/T/charleschen/hive_2011-08-31_17-04-10_850_6246519718607931090/-mr-10000 POSTHOOK: query: FROM src a LEFT OUTER JOIN @@ -527,7 +535,7 @@ WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/uc/ucuNeMAVGQGzy3459D8z2+++Z0Q/-Tmp-/amarsri/hive_2011-03-22_02-38-20_602_416360783321217123/-mr-10000 +POSTHOOK: Output: file:/var/folders/nt/ng21tg0n1jl4547lw0k8lg6hq_nw87/T/charleschen/hive_2011-08-31_17-04-10_850_6246519718607931090/-mr-10000 150 val_150 150 val_150 150 152 val_152 152 val_152 152 152 val_152 152 val_152 152 Index: ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (revision 1163875) +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (working copy) @@ -270,19 +270,22 @@ /** * Figures out the aliases for whom it is safe to push predicates based on - * ANSI SQL semantics For inner join, all predicates for all aliases can be - * pushed For full outer join, none of the predicates can be pushed as that - * would limit the number of rows for join For left outer join, all the - * predicates on the left side aliases can be pushed up For right outer - * join, all the predicates on the right side aliases can be pushed up Joins - * chain containing both left and right outer joins are treated as full - * outer join. TODO: further optimization opportunity for the case a.c1 = - * b.c1 and b.c2 = c.c2 a and b are first joined and then the result with c. - * But the second join op currently treats a and b as separate aliases and - * thus disallowing predicate expr containing both tables a and b (such as - * a.c3 + a.c4 > 20). Such predicates also can be pushed just above the - * second join and below the first join + * ANSI SQL semantics. The join conditions are left associative so "a + * RIGHT OUTER JOIN b LEFT OUTER JOIN cÊINNER JOIN d" is interpreted as + * "((a RIGHT OUTER JOIN b) LEFT OUTER JOIN c) INNER JOIN d". For inner + * joins, both the left and right join subexpressions are considered for + * pushing down aliases, for the right outer join, the right subexpression + * is considered and the left ignored and for the left outer join, the + * left subexpression is considered and the left ignored. Here, aliases b + * and d are eligible to be pushed up. * + * TODO: further optimization opportunity for the case a.c1 = b.c1 and b.c2 + * = c.c2 a and b are first joined and then the result with c. But the + * second join op currently treats a and b as separate aliases and thus + * disallowing predicate expr containing both tables a and b (such as a.c3 + * + a.c4 > 20). Such predicates also can be pushed just above the second + * join and below the first join + * * @param op * Join Operator * @param rr @@ -291,41 +294,24 @@ */ private Set getQualifiedAliases(JoinOperator op, RowResolver rr) { Set aliases = new HashSet(); - int loj = Integer.MAX_VALUE; - int roj = -1; - boolean oj = false; JoinCondDesc[] conds = op.getConf().getConds(); Map> posToAliasMap = op.getPosToAliasMap(); - for (JoinCondDesc jc : conds) { - if (jc.getType() == JoinDesc.FULL_OUTER_JOIN) { - oj = true; + int i; + for (i=conds.length-1; i>=0; i--){ + if (conds[i].getType() == JoinDesc.INNER_JOIN) { + aliases.addAll(posToAliasMap.get(i+1)); + } else if (conds[i].getType() == JoinDesc.FULL_OUTER_JOIN) { break; - } else if (jc.getType() == JoinDesc.LEFT_OUTER_JOIN) { - if (jc.getLeft() < loj) { - loj = jc.getLeft(); - } - } else if (jc.getType() == JoinDesc.RIGHT_OUTER_JOIN) { - if (jc.getRight() > roj) { - roj = jc.getRight(); - } + } else if (conds[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) { + aliases.addAll(posToAliasMap.get(i+1)); + break; + } else if (conds[i].getType() == JoinDesc.LEFT_OUTER_JOIN) { + continue; } } - if (oj || (loj != Integer.MAX_VALUE && roj != -1)) { - return aliases; + if(i == -1){ + aliases.addAll(posToAliasMap.get(0)); } - for (Entry> pa : posToAliasMap.entrySet()) { - if (loj != Integer.MAX_VALUE) { - if (pa.getKey() <= loj) { - aliases.addAll(pa.getValue()); - } - } else if (roj != -1) { - if (pa.getKey() >= roj) { - aliases.addAll(pa.getValue()); - } - } else { - aliases.addAll(pa.getValue()); - } - } Set aliases2 = rr.getTableNames(); aliases.retainAll(aliases2); return aliases;