Index: ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (revision 1069164) +++ ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (working copy) @@ -121,11 +121,9 @@ if ((nd.getChildren() == null) || getDispatchedList().containsAll(nd.getChildren())) { // all children are done or no need to walk the children - if (getDispatchedList().contains(nd)) { - // sanity check - assert false; + if (!getDispatchedList().contains(nd)) { + dispatch(nd, opStack); } - dispatch(nd, opStack); opStack.pop(); return; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 1069164) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -129,16 +129,11 @@ .entrySet()) { String alias = entry.getKey(); Operator op = entry.getValue(); - // get table scan op - if (!(op instanceof TableScanOperator)) { - throw new SemanticException("top op is not table scan"); - } - TableScanOperator tableScanOp = (TableScanOperator) op; // if the table scan is for big table; then skip it // tracing down the operator tree from the table scan operator - Operator parentOp = tableScanOp; - Operator childOp = tableScanOp.getChildOperators().get(0); + Operator parentOp = op; + Operator childOp = op.getChildOperators().get(0); while ((childOp != null) && (!childOp.equals(mapJoinOp))) { parentOp = childOp; assert parentOp.getChildOperators().size() == 1; @@ -155,7 +150,7 @@ continue; } // set alias to work and put into smallTableAliasList - newLocalWork.getAliasToWork().put(alias, tableScanOp); + newLocalWork.getAliasToWork().put(alias, op); smallTableAliasList.add(alias); // get input path and remove this alias from pathToAlias // because this file will be fetched by fetch operator Index: ql/src/test/queries/clientpositive/auto_join27.q =================================================================== --- ql/src/test/queries/clientpositive/auto_join27.q (revision 0) +++ ql/src/test/queries/clientpositive/auto_join27.q (revision 0) @@ -0,0 +1,25 @@ +set hive.auto.convert.join = true; + +explain +SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200; + + +SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200; Index: ql/src/test/results/clientpositive/auto_join27.q.out =================================================================== --- ql/src/test/results/clientpositive/auto_join27.q.out (revision 0) +++ ql/src/test/results/clientpositive/auto_join27.q.out (revision 0) @@ -0,0 +1,425 @@ +PREHOOK: query: explain +SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value)))))) src_12) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) k) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) v)))) src3) (AND (= (. (TOK_TABLE_OR_COL src_12) key) (. (TOK_TABLE_OR_COL src3) k)) (< (. (TOK_TABLE_OR_COL src3) k) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10, Stage-2 + Stage-9 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 + Stage-2 + Stage-4 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery2:src_12-subquery2:src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + src3:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src3:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 200) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 < 200) + type: boolean + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10002 + Union + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10004 + Union + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 0 + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10003 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10002 + Fetch Operator + limit: -1 + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10004 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10002 + Union + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10004 + Union + HashTable Sink Operator + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + src3:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 200) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 < 200) + type: boolean + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + Position of Big Table: 1 + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10002 + Union + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-12_648_7994814367228902299/-mr-10004 + Union + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + src3:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 200) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Filter Operator + predicate: + expr: (_col0 < 200) + type: boolean + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + handleSkewJoin: false + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:src_12-subquery1:src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-14_351_5133935939271841140/-mr-10000 +POSTHOOK: query: SELECT count(1) +FROM +( +SELECT src.key, src.value from src +UNION ALL +SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +(SELECT src.key as k, src.value as v from src) src3 +ON src_12.key = src3.k AND src3.k < 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-02-09_15-41-14_351_5133935939271841140/-mr-10000 +548