diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java index 29a39395db..17cd14d5e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java @@ -42,19 +42,24 @@ import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; import org.apache.hadoop.hive.ql.hooks.LineageInfo; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.TableAccessInfo; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReducerTimeStatsPerJob; import org.apache.hadoop.hive.ql.plan.api.AdjacencyType; import org.apache.hadoop.hive.ql.plan.api.NodeType; import org.apache.hadoop.hive.ql.plan.api.TaskType; +import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TJSONProtocol; @@ -108,6 +113,11 @@ private transient Long queryStartTime; private String operationName; + /** + * inputPartitionList is exposed to hooks for analysis. There is no need to serialize it. + */ + private transient List inputPartitionList; + public QueryPlan() { this.reducerTimeStatsPerJobList = new ArrayList(); } @@ -136,8 +146,29 @@ public QueryPlan(String queryString, BaseSemanticAnalyzer sem, Long startTime, S queryStartTime = startTime; this.operationName = operationName; this.resultSchema = resultSchema; + + if (sem instanceof SemanticAnalyzer) { + try { + SemanticAnalyzer sa = (SemanticAnalyzer) sem; + inputPartitionList = sa.getInputPartitionList(); + } catch (Exception e) { + LOG.warn("Unable to get input partition list from SemanticAnalyzer: " + StringUtils.stringifyException(e)); + } + } + if (inputPartitionList == null) { + inputPartitionList = Collections.EMPTY_LIST; + } + } + + /** + * The list of input partition names (after pruning). Useful for analysis. + * Format is the same as partition full name: "database@table@partkey=partvalue". + */ + public List getInputPartitionList() { + return inputPartitionList; } + public String getQueryStr() { return queryString; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 06fbf50f28..1382772308 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -237,7 +237,7 @@ private static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__"; private HashMap opToPartPruner; - private HashMap opToPartList; + public HashMap opToPartList; private HashMap> topOps; private final HashMap> topSelOps; private LinkedHashMap, OpParseContext> opParseCtx; @@ -262,7 +262,7 @@ */ private final HashMap nameToSplitSample; Map> groupOpToInputTables; - Map prunedPartitions; + public Map prunedPartitions; private List resultSchema; private CreateViewDesc createVwDesc; private ArrayList viewsExpanded; @@ -270,6 +270,24 @@ private final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx; + /** + * Get the input partition list (after pruning) in human-readable text string format. + * This is mainly used for analysis purposes. + */ + public List getInputPartitionList() { + List result = new ArrayList<>(); + if (opToPartList != null) { + for (Map.Entry e : opToPartList.entrySet()) { + if (e.getValue() != null) { + for (Partition p : e.getValue().getPartitions()) { + result.add(p.getCompleteName()); + } + } + } + } + return result; + } + // prefix for column names auto generated by hive private final String autogenColAliasPrfxLbl; private final boolean autogenColAliasPrfxIncludeFuncName;