diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index 29a39395db..17cd14d5e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -42,19 +42,24 @@ import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; import org.apache.hadoop.hive.ql.hooks.LineageInfo; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.TableAccessInfo; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReducerTimeStatsPerJob; import org.apache.hadoop.hive.ql.plan.api.AdjacencyType; import org.apache.hadoop.hive.ql.plan.api.NodeType; import org.apache.hadoop.hive.ql.plan.api.TaskType; +import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TJSONProtocol; @@ -108,6 +113,11 @@ private transient Long queryStartTime; private String operationName; + /** + * inputPartitionList is exposed to hooks for analysis. There is no need to serialize it. + */ + private transient List inputPartitionList; + public QueryPlan() { this.reducerTimeStatsPerJobList = new ArrayList(); } @@ -136,8 +146,29 @@ public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, S queryStartTime = startTime; this.operationName = operationName; this.resultSchema = resultSchema; + + if (sem instanceof SemanticAnalyzer) { + try { + SemanticAnalyzer sa = (SemanticAnalyzer) sem; + inputPartitionList = sa.getInputPartitionList(); + } catch (Exception e) { + LOG.warn("Unable to get input partition list from SemanticAnalyzer: " + StringUtils.stringifyException(e)); + } + } + if (inputPartitionList == null) { + inputPartitionList = Collections.EMPTY_LIST; + } + } + + /** + * The list of input partition names (after pruning). Useful for analysis. + * Format is the same as partition full name: "database@table@partkey=partvalue". + */ + public List getInputPartitionList() { + return inputPartitionList; } + public String getQueryStr() { return queryString; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 06fbf50f28..3d2dc8fe5f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -270,6 +270,27 @@ private final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx; + /** + * Get the input partition list (after pruning) in human-readable text string format. + * This is mainly used for analysis purposes. + * + * To get input and output table names, please refer to + * {@link BaseSemanticAnalyzer#getInputs()} and {@link BaseSemanticAnalyzer#getOutputs()} + */ + public List getInputPartitionList() { + List result = new ArrayList<>(); + if (opToPartList != null) { + for (Map.Entry e : opToPartList.entrySet()) { + if (e.getValue() != null) { + for (Partition p : e.getValue().getPartitions()) { + result.add(p.getCompleteName()); + } + } + } + } + return result; + } + // prefix for column names auto generated by hive private final String autogenColAliasPrfxLbl; private final boolean autogenColAliasPrfxIncludeFuncName;