Index: src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java =================================================================== --- src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java (revision 956763) +++ src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java (working copy) @@ -41,6 +41,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; /** * Expression factory for predicate pushdown processing. Each processor @@ -67,6 +69,17 @@ Operator op = ctx.getOp(); String[] colAlias = toRR.reverseLookup(colref.getColumn()); + + // prevent replicated resolve for table alias at join + // this occure only when a SelectOperator followed by a CommonJoinOperator + // immediately. + if (op instanceof SelectOperator && + op.getParentOperators().get(0) instanceof CommonJoinOperator && + colref.getTabAlias() != null) { + ctx.setIsCandidate(colref, true); + return true; + } + if (op.getColumnExprMap() != null) { // replace the output expression with the input expression so that // parent op can understand this expression Index: src/test/queries/clientpositive/ppd_same_alias.q =================================================================== --- src/test/queries/clientpositive/ppd_same_alias.q (revision 0) +++ src/test/queries/clientpositive/ppd_same_alias.q (revision 0) @@ -0,0 +1,18 @@ +set hive.optimize.ppd=true; + +explain select key1, key2, assoc_idx +from ( + select key1, key2, count(distinct value) as assoc_idx + from ( + select t1.key as key1, t2.key as key2, t1.value + from ( + select key, value + from src + group by key, value) t1 + join ( + select key, value + from src + group by key, value) t2 + on t1.value=t2.value) t1 + group by key1, key2) t1 +where key1 <> key2 and assoc_idx >2; Index: src/test/results/clientpositive/ppd_same_alias.q.out =================================================================== --- src/test/results/clientpositive/ppd_same_alias.q.out (revision 0) +++ src/test/results/clientpositive/ppd_same_alias.q.out (revision 0) @@ -0,0 +1,309 @@ +PREHOOK: query: explain select key1, key2, assoc_idx +from ( + select key1, key2, count(distinct value) as assoc_idx + from ( + select t1.key as key1, t2.key as key2, t1.value + from ( + select key, value + from src + group by key, value) t1 + join ( + select key, value + from src + group by key, value) t2 + on t1.value=t2.value) t1 + group by key1, key2) t1 +where key1 <> key2 and assoc_idx >2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key1, key2, assoc_idx +from ( + select key1, key2, count(distinct value) as assoc_idx + from ( + select t1.key as key1, t2.key as key2, t1.value + from ( + select key, value + from src + group by key, value) t1 + join ( + select key, value + from src + group by key, value) t2 + on t1.value=t2.value) t1 + group by key1, key2) t1 +where key1 <> key2 and assoc_idx >2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t2) (= (. (TOK_TABLE_OR_COL t1) value) (. (TOK_TABLE_OR_COL t2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) value))))) t1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key1)) (TOK_SELEXPR (TOK_TABLE_OR_COL key2)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL value)) assoc_idx)) (TOK_GROUPBY (TOK_TABLE_OR_COL key1) (TOK_TABLE_OR_COL key2)))) t1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key1)) (TOK_SELEXPR (TOK_TABLE_OR_COL key2)) (TOK_SELEXPR (TOK_TABLE_OR_COL assoc_idx))) (TOK_WHERE (and (<> (TOK_TABLE_OR_COL key1) (TOK_TABLE_OR_COL key2)) (> (TOK_TABLE_OR_COL assoc_idx) 2))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1:t1:t1:src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + $INTNAME1 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col2 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + Group By Operator + aggregations: + expr: count(DISTINCT _col2) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/tmp/shaojie/hive_2010-06-22_01-23-11_480_7624347678500668090/10003 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col2) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: ((_col0 <> _col1) and (_col2 > 2)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + t1:t1:t2:src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + +