diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index c4654d982e..d5fc8b8a55 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.TableAccessInfo; import org.apache.hadoop.hive.ql.plan.DDLDesc; import org.apache.hadoop.hive.ql.plan.DDLDesc.DDLDescWithWriteId; @@ -58,6 +59,7 @@ import org.apache.hadoop.hive.ql.plan.api.AdjacencyType; import org.apache.hadoop.hive.ql.plan.api.NodeType; import org.apache.hadoop.hive.ql.plan.api.TaskType; +import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TJSONProtocol; @@ -118,6 +120,11 @@ private final DDLDesc.DDLDescWithWriteId acidDdlDesc; private Boolean autoCommitValue; + /** + * inputPartitionList is exposed to hooks for analysis. There is no need to serialize it. + */ + private transient List inputPartitionList; + public QueryPlan() { this(null); } @@ -155,6 +162,14 @@ public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, S this.operation = operation; this.autoCommitValue = sem.getAutoCommitValue(); this.resultSchema = resultSchema; + + if (sem instanceof SemanticAnalyzer) { + SemanticAnalyzer sa = (SemanticAnalyzer) sem; + inputPartitionList = sa.getInputPartitionList(); + } else { + inputPartitionList = Collections.EMPTY_LIST; + } + // TODO: all this ACID stuff should be in some sub-object this.acidResourcesInQuery = sem.hasTransactionalInQuery(); this.acidSinks = sem.getAcidFileSinks(); @@ -162,6 +177,14 @@ public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, S this.acidAnalyzeTable = sem.getAcidAnalyzeTable(); } + /** + * The list of input partition names (after pruning). Useful for analysis. + * Format is the same as partition full name: "database@table@partkey=partvalue". + */ + public List getInputPartitionList() { + return inputPartitionList; + } + /** * @return true if any acid resources are read/written */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index b5adf1bd04..683bfff8f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -339,6 +339,27 @@ protected final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx; + /** + * Get the input partition list (after pruning) in human-readable text string format. + * This is mainly used for analysis purposes. + * + * To get input and output table names, please refer to + * {@link BaseSemanticAnalyzer#getInputs()} and {@link BaseSemanticAnalyzer#getOutputs()} + */ + public List getInputPartitionList() { + List result = new ArrayList<>(); + if (opToPartList != null) { + for (Map.Entry e : opToPartList.entrySet()) { + if (e.getValue() != null) { + for (Partition p : e.getValue().getPartitions()) { + result.add(p.getCompleteName()); + } + } + } + } + return result; + } + // prefix for column names auto generated by hive private final String autogenColAliasPrfxLbl; private final boolean autogenColAliasPrfxIncludeFuncName;