diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 698efdc..2671cb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8922,6 +8922,10 @@ private void mergeJoins(QB qb, QBJoinTree node, QBJoinTree target, int pos, int[ private ObjectPair findMergePos(QBJoinTree node, QBJoinTree target) { int res = -1; String leftAlias = node.getLeftAlias(); + if (leftAlias == null && (!node.getNoOuterJoin() || !target.getNoOuterJoin())) { + // Cross with outer join: currently we do not merge + return new ObjectPair(-1, null); + } ArrayList nodeCondn = node.getExpressions().get(0); ArrayList targetCondn = null; diff --git ql/src/test/queries/clientpositive/cross_join_merge.q ql/src/test/queries/clientpositive/cross_join_merge.q index 3ba4727..50f5813 100644 --- ql/src/test/queries/clientpositive/cross_join_merge.q +++ ql/src/test/queries/clientpositive/cross_join_merge.q @@ -10,6 +10,7 @@ select src1.key from src src1 join src src2 join src src3 where src1.key=src2.ke explain select src1.key from src src1 join src src2 on 5 = src2.key join src src3 on src1.key=src3.key; +-- no merge explain select src1.key from src src1 left outer join src src2 join src src3; diff --git ql/src/test/results/clientpositive/cross_join_merge.q.out ql/src/test/results/clientpositive/cross_join_merge.q.out index ccf2ff6..f15161a 100644 --- ql/src/test/results/clientpositive/cross_join_merge.q.out +++ ql/src/test/results/clientpositive/cross_join_merge.q.out @@ -337,16 +337,20 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: explain +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: -- no merge +explain select src1.key from src src1 left outer join src src2 join src src3 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: -- no merge +explain select src1.key from src src1 left outer join src src2 join src src3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -371,6 +375,30 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 250000 Data size: 3906000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250000 Data size: 3906000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -382,17 +410,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 - Inner Join 0 to 2 + Inner Join 0 to 1 keys: 0 1 - 2 outputColumnNames: _col0 - Statistics: Num rows: 125000000 Data size: 2453000000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125000000 Data size: 2578000000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 125000000 Data size: 2453000000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125000000 Data size: 2578000000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat