diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index a1fb874..a841d72 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -4417,6 +4417,10 @@ private int createView(Hive db, CreateViewDesc crtView) throws HiveException { HiveMaterializedViewsRegistry.get().addMaterializedView(tbl); } addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK)); + + //set lineage info + DataContainer dc = new DataContainer(tbl.getTTable()); + SessionState.get().getLineageState().setLineage(new Path(crtView.getViewName()), dc, tbl.getCols()); } return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f275f6a..ab1fd0d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -236,6 +236,8 @@ import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.security.UserGroupInformation; +import com.google.common.base.Splitter; +import com.google.common.base.Strings; import com.google.common.collect.Sets; import com.google.common.math.IntMath; @@ -7141,7 +7143,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) if (ltd != null && SessionState.get() != null) { SessionState.get().getLineageState() - .mapDirToFop(ltd.getSourcePath(), (FileSinkOperator) output); + .mapDirToOp(ltd.getSourcePath(), (FileSinkOperator) output); } else if ( queryState.getCommandType().equals(HiveOperation.CREATETABLE_AS_SELECT.getOperationName())) { Path tlocation = null; @@ -7154,7 +7156,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) } SessionState.get().getLineageState() - .mapDirToFop(tlocation, (FileSinkOperator) output); + .mapDirToOp(tlocation, (FileSinkOperator) output); } if (LOG.isDebugEnabled()) { @@ -11041,14 +11043,22 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce // Generate lineage info for create view statements // if LineageLogger hook is configured. - if (HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS).contains( - "org.apache.hadoop.hive.ql.hooks.LineageLogger")) { + // Add the transformation that computes the lineage information. + Set postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults() + .omitEmptyStrings() + .split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS)))); + if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") + || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") + || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) { ArrayList transformations = new ArrayList(); transformations.add(new HiveOpConverterPostProc()); transformations.add(new Generator()); for (Transform t : transformations) { pCtx = t.transform(pCtx); } + // we just use view name as location. + SessionState.get().getLineageState() + .mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp); } return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java index 223f0ea..0f95063 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.hooks.LineageInfo; import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer; import org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index; @@ -38,12 +39,12 @@ public class LineageState { /** - * Mapping from the directory name to FileSinkOperator. This + * Mapping from the directory name to FileSinkOperator (may not be FileSinkOperator for views). This * mapping is generated at the filesink operator creation * time and is then later used to created the mapping from * movetask to the set of filesink operators. */ - private final Map dirToFop; + private final Map dirToFop; /** * The lineage context index for this query. @@ -60,7 +61,7 @@ * Constructor. */ public LineageState() { - dirToFop = new HashMap(); + dirToFop = new HashMap(); linfo = new LineageInfo(); index = new Index(); } @@ -69,9 +70,9 @@ public LineageState() { * Adds a mapping from the load work to the file sink operator. * * @param dir The directory name. - * @param fop The file sink operator. + * @param fop The sink operator. */ - public void mapDirToFop(Path dir, FileSinkOperator fop) { + public void mapDirToOp(Path dir, Operator fop) { dirToFop.put(dir, fop); } @@ -85,18 +86,18 @@ public void mapDirToFop(Path dir, FileSinkOperator fop) { public void setLineage(Path dir, DataContainer dc, List cols) { // First lookup the file sink operator from the load work. - FileSinkOperator fop = dirToFop.get(dir); + Operator op = dirToFop.get(dir); // Go over the associated fields and look up the dependencies // by position in the row schema of the filesink operator. - if (fop == null) { + if (op == null) { return; } - List signature = fop.getSchema().getSignature(); + List signature = op.getSchema().getSignature(); int i = 0; for (FieldSchema fs : cols) { - linfo.putDependency(dc, fs, index.getDependency(fop, signature.get(i++))); + linfo.putDependency(dc, fs, index.getDependency(op, signature.get(i++))); } }