Index: conf/hive-default.xml
===================================================================
--- conf/hive-default.xml (revision 1042839)
+++ conf/hive-default.xml (working copy)
@@ -669,6 +669,12 @@
+ hive.stats.autogather.read
+ false
+ A flag to gather statistics automatically during the SELECT command.
+
+
+
hive.stats.jdbcdriver
org.apache.derby.jdbc.EmbeddedDriver
The JDBC driver for the database that stores temporary hive statistics.
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1042839)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -284,7 +284,8 @@
HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
// Statistics
- HIVESTATSAUTOGATHER("hive.stats.autogather", true),
+ HIVESTATSAUTOGATHER("hive.stats.autogather", true), // autogather stats on write?
+ HIVESTATSAUTOGATHERREAD("hive.stats.autogather.read", false), // autogather stats on read?
HIVESTATSDBCLASS("hive.stats.dbclass",
"jdbc:derby"), // other options are jdbc:mysql and hbase as defined in StatsSetupConst.java
HIVESTATSJDBCDRIVER("hive.stats.jdbcdriver",
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (working copy)
@@ -222,7 +222,7 @@
if (prunedParts == null) {
prunedParts = PartitionPruner.prune(tbl, pGraphContext
.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias,
- pGraphContext.getPrunedPartitions());
+ pGraphContext.getPrunedPartitions(), pGraphContext);
pGraphContext.getOpToPartList().put(tso, prunedParts);
}
} catch (HiveException e) {
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java (working copy)
@@ -112,7 +112,7 @@
prunedPartList = PartitionPruner.prune(owc.getParseContext().getTopToTable().get(top),
ppr_pred, owc.getParseContext().getConf(),
(String) owc.getParseContext().getTopOps().keySet()
- .toArray()[0], owc.getParseContext().getPrunedPartitions());
+ .toArray()[0], owc.getParseContext().getPrunedPartitions(), owc.getParseContext());
if (prunedPartList != null) {
owc.getParseContext().getOpToPartList().put(top, prunedPartList);
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (working copy)
@@ -76,6 +76,7 @@
ctx.setCurrAliasId(currAliasId);
mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId));
+ currWork.setGatheringStats(true);
QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
if (parseInfo.isAnalyzeCommand()) {
@@ -88,7 +89,6 @@
Task statsTask = TaskFactory.get(statsWork, parseCtx.getConf());
currTask.addDependentTask(statsTask);
ctx.getRootTasks().add(currTask);
- currWork.setGatheringStats(true);
// NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
// and pass it to setTaskPlan as the last parameter
Set confirmedPartns = new HashSet();
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy)
@@ -50,6 +50,8 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
/**
* The transformation step that does partition pruning.
@@ -151,7 +153,8 @@
*/
public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr,
HiveConf conf, String alias,
- Map prunedPartitionsMap) throws HiveException {
+ Map prunedPartitionsMap,
+ ParseContext parseCtx) throws HiveException {
LOG.trace("Started pruning partiton");
LOG.trace("tabname = " + tab.getTableName());
LOG.trace("prune Expression = " + prunerExpr);
@@ -237,6 +240,13 @@
// Now return the set of partitions
ret = new PrunedPartitionList(true_parts, unkn_parts, denied_parts);
prunedPartitionsMap.put(key, ret);
+
+ List partitions = new ArrayList();
+ partitions.addAll(true_parts);
+ partitions.addAll(unkn_parts);
+ tableSpec ts = new tableSpec(tab, tab.getTableName(), partitions);
+ parseCtx.setInputTableSpecs(alias, ts);
+
return ret;
}
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy)
@@ -550,7 +550,7 @@
if (partsList == null) {
partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(),
- alias_id, parseCtx.getPrunedPartitions());
+ alias_id, parseCtx.getPrunedPartitions(), parseCtx);
parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList);
}
} catch (SemanticException e) {
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (working copy)
@@ -207,7 +207,7 @@
if (partsList == null) {
partsList = PartitionPruner.prune(destTable, pGraphContext
.getOpToPartPruner().get(ts), pGraphContext.getConf(), table,
- pGraphContext.getPrunedPartitions());
+ pGraphContext.getPrunedPartitions(), pGraphContext);
pGraphContext.getOpToPartList().put(ts, partsList);
}
} catch (HiveException e) {
Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (working copy)
@@ -204,7 +204,7 @@
prunedParts = pGraphContext.getOpToPartList().get(tso);
if (prunedParts == null) {
prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias,
- pGraphContext.getPrunedPartitions());
+ pGraphContext.getPrunedPartitions(), pGraphContext);
pGraphContext.getOpToPartList().put(tso, prunedParts);
}
} catch (HiveException e) {
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy)
@@ -112,6 +112,7 @@
import org.apache.hadoop.hive.ql.plan.UnlockTableDesc;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
import org.apache.hadoop.hive.ql.plan.api.StageType;
+import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
@@ -1633,6 +1634,8 @@
outputFormattCls = tbl.getOutputFormatClass().getName();
}
+ String rowCount = tbl.getProperty(StatsSetupConst.ROW_COUNT);
+
String owner = tbl.getOwner();
List cols = tbl.getCols();
String ddlCols = MetaStoreUtils.getDDLFromFieldSchema("columns", cols);
@@ -1655,6 +1658,10 @@
outStream.write(terminator);
outStream.writeBytes("columns:" + ddlCols);
outStream.write(terminator);
+ if (rowCount != null) {
+ outStream.writeBytes("rows:" + rowCount);
+ }
+ outStream.write(terminator);
outStream.writeBytes("partitioned:" + isPartitioned);
outStream.write(terminator);
outStream.writeBytes("partitionColumns:" + partitionCols);
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (working copy)
@@ -309,6 +309,10 @@
return destToLimit.get(dest);
}
+ public HashMap getDestToLimit() {
+ return destToLimit;
+ }
+
/**
* @return the outerQueryLimit
*/
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy)
@@ -583,10 +583,19 @@
public static enum SpecType {TABLE_ONLY, STATIC_PARTITION, DYNAMIC_PARTITION};
public SpecType specType;
+ /* Constructor for a "dummy" tableSpec used for stats publishing */
+ public tableSpec(Table tableHandle, String tableName, List partitions) {
+ this.tableName = tableName;
+ this.tableHandle = tableHandle;
+ this.partitions = partitions;
+ }
+
public tableSpec(Hive db, HiveConf conf, ASTNode ast)
throws SemanticException {
- assert (ast.getToken().getType() == HiveParser.TOK_TAB || ast.getToken().getType() == HiveParser.TOK_TABTYPE);
+ assert (ast.getToken().getType() == HiveParser.TOK_TAB
+ || ast.getToken().getType() == HiveParser.TOK_TABTYPE
+ || ast.getToken().getType() == HiveParser.TOK_TABREF) : ast.dump();
int childIndex = 0;
numDynParts = 0;
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1042839)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy)
@@ -41,6 +41,8 @@
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
+import org.apache.hadoop.hive.ql.metadata.Partition;
/**
* Parse Context: The current parse context. This is passed to the optimizer
@@ -77,6 +79,17 @@
private Map> groupOpToInputTables;
private Map prunedPartitions;
+ private Map aliasToInputTableSpecs;
+
+ public void setInputTableSpecs(String key, tableSpec ts) {
+ aliasToInputTableSpecs.put(key, ts);
+ }
+
+ public tableSpec getInputTableSpecs(String key) {
+ tableSpec ts = aliasToInputTableSpecs.get(key);
+ return ts;
+ }
+
/**
* The lineage information.
*/
@@ -163,6 +176,7 @@
this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer;
hasNonPartCols = false;
this.groupOpToInputTables = new HashMap>();
+ this.aliasToInputTableSpecs = new HashMap();
this.groupOpToInputTables = groupOpToInputTables;
this.prunedPartitions = prunedPartitions;
this.opToSamplePruner = opToSamplePruner;