Index: ql/src/test/results/clientpositive/explain_dependency.q.out =================================================================== --- ql/src/test/results/clientpositive/explain_dependency.q.out (revision 1422750) +++ ql/src/test/results/clientpositive/explain_dependency.q.out (working copy) @@ -54,7 +54,7 @@ EXPLAIN DEPENDENCY SELECT key, count(1) FROM srcpart WHERE ds IS NOT NULL GROUP BY key POSTHOOK: type: QUERY -{"input_partitions":["default@srcpart@ds=2008-04-08/hr=11","default@srcpart@ds=2008-04-08/hr=12","default@srcpart@ds=2008-04-09/hr=11","default@srcpart@ds=2008-04-09/hr=12"],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} +{"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} PREHOOK: query: EXPLAIN DEPENDENCY SELECT key, count(1) FROM (SELECT key, value FROM src) subq1 GROUP BY key PREHOOK: type: QUERY @@ -72,33 +72,33 @@ SELECT key, value FROM src UNION ALL SELECT key, value FROM srcpart WHERE ds IS NOT NULL ) S1 POSTHOOK: type: QUERY -{"input_partitions":["default@srcpart@ds=2008-04-08/hr=11","default@srcpart@ds=2008-04-08/hr=12","default@srcpart@ds=2008-04-09/hr=11","default@srcpart@ds=2008-04-09/hr=12"],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} +{"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} PREHOOK: query: EXPLAIN DEPENDENCY SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN DEPENDENCY SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL POSTHOOK: type: QUERY -{"input_partitions":["default@srcpart@ds=2008-04-08/hr=11","default@srcpart@ds=2008-04-08/hr=12","default@srcpart@ds=2008-04-09/hr=11","default@srcpart@ds=2008-04-09/hr=12"],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} +{"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} PREHOOK: query: -- With views EXPLAIN DEPENDENCY SELECT * FROM V1 PREHOOK: type: QUERY POSTHOOK: query: -- With views EXPLAIN DEPENDENCY SELECT * FROM V1 POSTHOOK: type: QUERY -{"input_partitions":[],"input_tables":[{"tablename":"default@v1","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} +{"input_partitions":[],"input_tables":[{"tablename":"default@v1","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v1]"}]} PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V2 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V2 POSTHOOK: type: QUERY -{"input_partitions":["default@srcpart@ds=2008-04-08/hr=11","default@srcpart@ds=2008-04-08/hr=12","default@srcpart@ds=2008-04-09/hr=11","default@srcpart@ds=2008-04-09/hr=12"],"input_tables":[{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} +{"input_partitions":[{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE","tableParents":"[default@v2]"}]} PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V3 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V3 POSTHOOK: type: QUERY -{"input_partitions":["default@srcpart@ds=2008-04-08/hr=11","default@srcpart@ds=2008-04-08/hr=12","default@srcpart@ds=2008-04-09/hr=11","default@srcpart@ds=2008-04-09/hr=12"],"input_tables":[{"tablename":"default@v3","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} +{"input_partitions":[{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@v3","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v3]"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v3]"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE","tableParents":"[default@v2]"}]} PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V4 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V4 POSTHOOK: type: QUERY -{"input_partitions":["default@srcpart@ds=2008-04-08/hr=11","default@srcpart@ds=2008-04-08/hr=12","default@srcpart@ds=2008-04-09/hr=11","default@srcpart@ds=2008-04-09/hr=12"],"input_tables":[{"tablename":"default@v4","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v1","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} +{"input_partitions":[{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@v4","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v4]"},{"tablename":"default@v1","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v4]"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v4, default@v1]"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE","tableParents":"[default@v2]"}]} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1422750) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -601,6 +601,29 @@ setTaskPlan(alias_id, topOp, plan, local, opProcCtx, null); } + private static ReadEntity getParentViewInfo(String alias_id, + Map viewAliasToInput) { + String[] aliases = alias_id.split(":"); + + String currentAlias = null; + ReadEntity currentInput = null; + // Find the immediate parent possible. + // For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T + // -> implies depends on. + // T's parent would be V1 + for (int pos = 0; pos < aliases.length; pos++) { + currentAlias = currentAlias == null ? aliases[pos] : currentAlias + ":" + aliases[pos]; + ReadEntity input = viewAliasToInput.get(currentAlias); + if (input == null) { + return currentInput; + } + currentInput = input; + } + + return currentInput; + } + + /** * set the current task in the mapredWork. * @@ -703,11 +726,21 @@ boolean isFirstPart = true; boolean emptyInput = true; boolean singlePartition = (parts.size() == 1); + + // Track the dependencies for the view. Consider a query like: select * from V; + // where V is a view of the form: select * from T + // The dependencies should include V at depth 0, and T at depth 1 (inferred). + ReadEntity parentViewInfo = getParentViewInfo(alias_id, parseCtx.getViewAliasToInput()); + + // The table should also be considered a part of inputs, even if the table is a + // partitioned table and whether any partition is selected or not + PlanUtils.addInput(inputs, + new ReadEntity(parseCtx.getTopToTable().get(topOp), parentViewInfo)); for (Partition part : parts) { if (part.getTable().isPartitioned()) { - inputs.add(new ReadEntity(part)); + PlanUtils.addInput(inputs, new ReadEntity(part, parentViewInfo)); } else { - inputs.add(new ReadEntity(part.getTable())); + PlanUtils.addInput(inputs, new ReadEntity(part.getTable(), parentViewInfo)); } // Later the properties have to come from the partition as opposed Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (revision 1422750) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (working copy) @@ -78,34 +78,34 @@ JSONObject outJSONObject = new JSONObject(); List> inputTableInfo = new ArrayList>(); - Set inputPartitions = new HashSet(); - Set inputTables = new HashSet(); - Table table = null; + List> inputPartitionInfo = new ArrayList>(); for (ReadEntity input: work.getInputs()) { switch (input.getType()) { case TABLE: - table = input.getTable(); + Table table = input.getTable(); + Map tableInfo = new HashMap(); + tableInfo.put("tablename", table.getCompleteName()); + tableInfo.put("tabletype", table.getTableType().toString()); + if ((input.getParents() != null) && (!input.getParents().isEmpty())) { + tableInfo.put("tableParents", input.getParents().toString()); + } + inputTableInfo.add(tableInfo); break; case PARTITION: - inputPartitions.add(input.getPartition().getCompleteName()); - table = input.getPartition().getTable(); + Map partitionInfo = new HashMap(); + partitionInfo.put("partitionName", input.getPartition().getCompleteName()); + if ((input.getParents() != null) && (!input.getParents().isEmpty())) { + partitionInfo.put("partitionParents", input.getParents().toString()); + } + inputPartitionInfo.add(partitionInfo); break; default: - table = null; break; } - - if (table != null && !inputTables.contains(table.getCompleteName())) { - Map tableInfo = new HashMap(); - tableInfo.put("tablename", table.getCompleteName()); - tableInfo.put("tabletype", table.getTableType().toString()); - inputTableInfo.add(tableInfo); - inputTables.add(table.getCompleteName()); - } } outJSONObject.put("input_tables", inputTableInfo); - outJSONObject.put("input_partitions", inputPartitions); + outJSONObject.put("input_partitions", inputPartitionInfo); return outJSONObject; } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (revision 1422750) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (working copy) @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -35,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; @@ -758,4 +760,23 @@ // prevent instantiation } + public static ReadEntity addInput(Set inputs, ReadEntity newInput) { + // If the input is already present, make sure the new parent is added to the input. + if (inputs.contains(newInput)) { + for (ReadEntity input : inputs) { + if (input.equals(newInput)) { + if ((newInput.getParents() != null) && (!newInput.getParents().isEmpty())) { + input.getParents().addAll(newInput.getParents()); + } + return input; + } + } + assert false; + } else { + inputs.add(newInput); + return newInput; + } + // make compile happy + return null; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1422750) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -220,6 +220,8 @@ private final String autogenColAliasPrfxLbl; private final boolean autogenColAliasPrfxIncludeFuncName; + private final Map viewAliasToInput = new HashMap(); + //Max characters when auto generating the column name with func name private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; @@ -302,7 +304,7 @@ loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner); + opToPartToSkewedPruner, viewAliasToInput); } @SuppressWarnings("nls") @@ -941,16 +943,25 @@ } private void getMetaData(QBExpr qbexpr) throws SemanticException { + getMetaData(qbexpr, null); + } + + private void getMetaData(QBExpr qbexpr, ReadEntity parentInput) + throws SemanticException { if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) { - getMetaData(qbexpr.getQB()); + getMetaData(qbexpr.getQB(), parentInput); } else { - getMetaData(qbexpr.getQBExpr1()); - getMetaData(qbexpr.getQBExpr2()); + getMetaData(qbexpr.getQBExpr1(), parentInput); + getMetaData(qbexpr.getQBExpr2(), parentInput); } } + public void getMetaData(QB qb) throws SemanticException { + getMetaData(qb, null); + } + @SuppressWarnings("nls") - public void getMetaData(QB qb) throws SemanticException { + public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException { try { LOG.info("Get metadata for source tables"); @@ -959,7 +970,8 @@ // We have to materialize the table alias list since we might // modify it in the middle for view rewrite. List tabAliases = new ArrayList(qb.getTabAliases()); - Map aliasToViewName = new HashMap(); + Map> aliasToViewInfo = + new HashMap>(); for (String alias : tabAliases) { String tab_name = qb.getTabNameForAlias(alias); Table tab = null; @@ -1001,10 +1013,12 @@ " -> " + fullViewName + ")."); } replaceViewReferenceWithDefinition(qb, tab, tab_name, alias); - aliasToViewName.put(alias, fullViewName); // This is the last time we'll see the Table objects for views, so add it to the inputs // now - inputs.add(new ReadEntity(tab)); + ReadEntity viewInput = new ReadEntity(tab, parentInput); + viewInput = PlanUtils.addInput(inputs, viewInput); + aliasToViewInfo.put(alias, new ObjectPair(fullViewName, viewInput)); + viewAliasToInput.put(getAliasId(alias, qb), viewInput); continue; } @@ -1033,12 +1047,14 @@ LOG.info("Get metadata for subqueries"); // Go over the subqueries and getMetaData for these for (String alias : qb.getSubqAliases()) { - boolean wasView = aliasToViewName.containsKey(alias); + boolean wasView = aliasToViewInfo.containsKey(alias); + ReadEntity newParentInput = null; if (wasView) { - viewsExpanded.add(aliasToViewName.get(alias)); + viewsExpanded.add(aliasToViewInfo.get(alias).getFirst()); + newParentInput = aliasToViewInfo.get(alias).getSecond(); } QBExpr qbexpr = qb.getSubqForAlias(alias); - getMetaData(qbexpr); + getMetaData(qbexpr, newParentInput); if (wasView) { viewsExpanded.remove(viewsExpanded.size()-1); } @@ -8214,7 +8230,7 @@ loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, - opToPartToSkewedPruner); + opToPartToSkewedPruner, viewAliasToInput); // Generate table access stats if required if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == true) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1422750) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -85,6 +85,7 @@ // reducer private Map> groupOpToInputTables; private Map prunedPartitions; + private Map viewAliasToInput; /** * The lineage information. @@ -169,7 +170,8 @@ GlobalLimitCtx globalLimitCtx, HashMap nameToSplitSample, HashSet semanticInputs, List> rootTasks, - Map> opToPartToSkewedPruner) { + Map> opToPartToSkewedPruner, + Map viewAliasToInput) { this.conf = conf; this.qb = qb; this.ast = ast; @@ -196,6 +198,7 @@ this.semanticInputs = semanticInputs; this.rootTasks = rootTasks; this.opToPartToSkewedPruner = opToPartToSkewedPruner; + this.viewAliasToInput = viewAliasToInput; } /** @@ -578,4 +581,7 @@ this.opToPartToSkewedPruner = opToPartToSkewedPruner; } + public Map getViewAliasToInput() { + return viewAliasToInput; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java (revision 1422750) +++ ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java (working copy) @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.hooks; import java.io.Serializable; +import java.util.HashSet; +import java.util.Set; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; @@ -29,6 +31,13 @@ */ public class ReadEntity extends Entity implements Serializable { + // Consider a query like: select * from V, where the view V is defined as: + // select * from T + // The inputs will contain V and T (parent: V) + + // For views, the entities can be nested - by default, entities are at the top level + private Set parents = null; + /** * For serialization only. */ @@ -46,8 +55,20 @@ super(t); } + private void initParent(ReadEntity parent) { + if (parent != null) { + this.parents = new HashSet(); + this.parents.add(parent); + } + } + + public ReadEntity(Table t, ReadEntity parent) { + super(t); + initParent(parent); + } + /** - * Constructor given a partiton. + * Constructor given a partition. * * @param p * The partition that the query reads from. @@ -56,6 +77,15 @@ super(p); } + public ReadEntity(Partition p, ReadEntity parent) { + super(p); + initParent(parent); + } + + public Set getParents() { + return parents; + } + /** * Equals function. */