Index: ql/src/test/results/clientpositive/lineage1.q.out =================================================================== --- ql/src/test/results/clientpositive/lineage1.q.out (revision 0) +++ ql/src/test/results/clientpositive/lineage1.q.out (revision 0) @@ -0,0 +1,290 @@ +PREHOOK: query: drop table dest_l1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table dest_l1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_l1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_l1 +SELECT j.* +FROM (SELECT t1.key, p1.value + FROM src1 t1 + LEFT OUTER JOIN src p1 + ON (t1.key = p1.key) + UNION ALL + SELECT t2.key, p2.value + FROM src1 t2 + LEFT OUTER JOIN src p2 + ON (t2.key = p2.key)) j +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest_l1 +SELECT j.* +FROM (SELECT t1.key, p1.value + FROM src1 t1 + LEFT OUTER JOIN src p1 + ON (t1.key = p1.key) + UNION ALL + SELECT t2.key, p2.value + FROM src1 t2 + LEFT OUTER JOIN src p2 + ON (t2.key = p2.key)) j +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF src1 t1) (TOK_TABREF src p1) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL p1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL p1) value))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF src1 t2) (TOK_TABREF src p2) (= (. (TOK_TABLE_OR_COL t2) key) (. (TOK_TABLE_OR_COL p2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL p2) value)))))) j)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_l1)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF j))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-5 depends on stages: Stage-2 , consists of Stage-4, Stage-3 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3 + Stage-3 + Stage-6 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:j-subquery1:p1 + TableScan + alias: p1 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + null-subquery1:j-subquery1:t1 + TableScan + alias: t1 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10002 + Union + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_l1 + file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10004 + Union + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_l1 + + Stage: Stage-5 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true + destination: file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_l1 + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/data/users/athusoo/apache_workspaces/hive_trunk_ws1/build/ql/scratchdir/hive_2010-04-22_15-52-46_260_8748362802646516479/10003 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: int + expr: value + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_l1 + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + null-subquery2:j-subquery2:p2 + TableScan + alias: p2 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + null-subquery2:j-subquery2:t2 + TableScan + alias: t2 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + +PREHOOK: query: INSERT OVERWRITE TABLE dest_l1 +SELECT j.* +FROM (SELECT t1.key, p1.value + FROM src1 t1 + LEFT OUTER JOIN src p1 + ON (t1.key = p1.key) + UNION ALL + SELECT t2.key, p2.value + FROM src1 t2 + LEFT OUTER JOIN src p2 + ON (t2.key = p2.key)) j +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_l1 +POSTHOOK: query: INSERT OVERWRITE TABLE dest_l1 +SELECT j.* +FROM (SELECT t1.key, p1.value + FROM src1 t1 + LEFT OUTER JOIN src p1 + ON (t1.key = p1.key) + UNION ALL + SELECT t2.key, p2.value + FROM src1 t2 + LEFT OUTER JOIN src p2 + ON (t2.key = p2.key)) j +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_l1 +POSTHOOK: Lineage: dest_l1.key EXPRESSION [(src1)t1.FieldSchema(name:key, type:string, comment:default), (src1)t2.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_l1.value EXPRESSION [(src)p2.FieldSchema(name:value, type:string, comment:default), (src)p1.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/queries/clientpositive/lineage1.q =================================================================== --- ql/src/test/queries/clientpositive/lineage1.q (revision 0) +++ ql/src/test/queries/clientpositive/lineage1.q (revision 0) @@ -0,0 +1,30 @@ +drop table dest_l1; + +CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE; + + +EXPLAIN +INSERT OVERWRITE TABLE dest_l1 +SELECT j.* +FROM (SELECT t1.key, p1.value + FROM src1 t1 + LEFT OUTER JOIN src p1 + ON (t1.key = p1.key) + UNION ALL + SELECT t2.key, p2.value + FROM src1 t2 + LEFT OUTER JOIN src p2 + ON (t2.key = p2.key)) j; + +INSERT OVERWRITE TABLE dest_l1 +SELECT j.* +FROM (SELECT t1.key, p1.value + FROM src1 t1 + LEFT OUTER JOIN src p1 + ON (t1.key = p1.key) + UNION ALL + SELECT t2.key, p2.value + FROM src1 t2 + LEFT OUTER JOIN src p2 + ON (t2.key = p2.key)) j; + Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (revision 937077) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (working copy) @@ -393,8 +393,10 @@ ArrayList inp_cols = inpOp.getSchema().getSignature(); int cnt = 0; for(ColumnInfo ci : rs.getSignature()) { - lCtx.getIndex().mergeDependency(op, ci, - lCtx.getIndex().getDependency(inpOp, inp_cols.get(cnt++))); + Dependency inp_dep = lCtx.getIndex().getDependency(inpOp, inp_cols.get(cnt++)); + if (inp_dep != null) { + lCtx.getIndex().mergeDependency(op, ci, inp_dep); + } } return null; }