diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java index 7ed50b4..8dcf444 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.hooks; import java.io.Serializable; +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Set; import org.apache.hadoop.fs.Path; @@ -49,7 +51,7 @@ // For views, the entities can be nested - by default, entities are at the top level private final Set parents = new HashSet(); - + private List accessedColumns = new ArrayList(); /** * For serialization only. @@ -159,4 +161,12 @@ public boolean needsLock() { public void noLockNeeded() { needsLock = false; } + + public List getAccessedColumns() { + return accessedColumns; + } + + public void setAccessedColumns(List accessedColumns) { + this.accessedColumns = accessedColumns; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 7a71ec7..490f94c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9511,6 +9511,11 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { LOG.info("Completed plan generation"); + // put accessed columns to readEntity + if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS) == true) { + putAccessedColumnsToReadEntity(inputs, columnAccessInfo); + } + if (!ctx.getExplain()) { // if desired check we're not going over partition scan limits enforceScanLimits(pCtx, origFetchTask); @@ -9519,6 +9524,31 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { return; } + private void putAccessedColumnsToReadEntity(HashSet inputs, ColumnAccessInfo columnAccessInfo) { + Map> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap(); + if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) { + for(ReadEntity entity: inputs) { + List cols = new ArrayList(); + switch (entity.getType()) { + case TABLE: + cols = tableToColumnAccessMap.get(entity.getTable().getCompleteName()); + if (cols != null && !cols.isEmpty()) { + entity.setAccessedColumns(cols); + } + break; + case PARTITION: + cols = tableToColumnAccessMap.get(entity.getPartition().getTable().getCompleteName()); + if (cols != null && !cols.isEmpty()) { + entity.setAccessedColumns(cols); + } + break; + default: + // no-op + } + } + } + } + private void enforceScanLimits(ParseContext pCtx, FetchTask fTask) throws SemanticException { int scanLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITTABLESCANPARTITION);