Index: conf/hive-default.xml =================================================================== --- conf/hive-default.xml (revision 1042839) +++ conf/hive-default.xml (working copy) @@ -669,6 +669,12 @@ + hive.stats.autogather.read + false + A flag to gather statistics automatically during the SELECT command. + + + hive.stats.jdbcdriver org.apache.derby.jdbc.EmbeddedDriver The JDBC driver for the database that stores temporary hive statistics. Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1042839) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -284,7 +284,8 @@ HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true), // Statistics - HIVESTATSAUTOGATHER("hive.stats.autogather", true), + HIVESTATSAUTOGATHER("hive.stats.autogather", true), // autogather stats on write? + HIVESTATSAUTOGATHERREAD("hive.stats.autogather.read", false), // autogather stats on read? HIVESTATSDBCLASS("hive.stats.dbclass", "jdbc:derby"), // other options are jdbc:mysql and hbase as defined in StatsSetupConst.java HIVESTATSJDBCDRIVER("hive.stats.jdbcdriver", Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java (working copy) @@ -222,7 +222,7 @@ if (prunedParts == null) { prunedParts = PartitionPruner.prune(tbl, pGraphContext .getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); + pGraphContext.getPrunedPartitions(), pGraphContext); pGraphContext.getOpToPartList().put(tso, prunedParts); } } catch (HiveException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrOpProcFactory.java (working copy) @@ -112,7 +112,7 @@ prunedPartList = PartitionPruner.prune(owc.getParseContext().getTopToTable().get(top), ppr_pred, owc.getParseContext().getConf(), (String) owc.getParseContext().getTopOps().keySet() - .toArray()[0], owc.getParseContext().getPrunedPartitions()); + .toArray()[0], owc.getParseContext().getPrunedPartitions(), owc.getParseContext()); if (prunedPartList != null) { owc.getParseContext().getOpToPartList().put(top, prunedPartList); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (working copy) @@ -76,6 +76,7 @@ ctx.setCurrAliasId(currAliasId); mapCurrCtx.put(op, new GenMapRedCtx(currTask, currTopOp, currAliasId)); + currWork.setGatheringStats(true); QBParseInfo parseInfo = parseCtx.getQB().getParseInfo(); if (parseInfo.isAnalyzeCommand()) { @@ -88,7 +89,6 @@ Task statsTask = TaskFactory.get(statsWork, parseCtx.getConf()); currTask.addDependentTask(statsTask); ctx.getRootTasks().add(currTask); - currWork.setGatheringStats(true); // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, // and pass it to setTaskPlan as the last parameter Set confirmedPartns = new HashSet(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (working copy) @@ -50,6 +50,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; /** * The transformation step that does partition pruning. @@ -151,7 +153,8 @@ */ public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, - Map prunedPartitionsMap) throws HiveException { + Map prunedPartitionsMap, + ParseContext parseCtx) throws HiveException { LOG.trace("Started pruning partiton"); LOG.trace("tabname = " + tab.getTableName()); LOG.trace("prune Expression = " + prunerExpr); @@ -237,6 +240,13 @@ // Now return the set of partitions ret = new PrunedPartitionList(true_parts, unkn_parts, denied_parts); prunedPartitionsMap.put(key, ret); + + List partitions = new ArrayList(); + partitions.addAll(true_parts); + partitions.addAll(unkn_parts); + tableSpec ts = new tableSpec(tab, tab.getTableName(), partitions); + parseCtx.setInputTableSpecs(alias, ts); + return ret; } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -550,7 +550,7 @@ if (partsList == null) { partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp), parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(), - alias_id, parseCtx.getPrunedPartitions()); + alias_id, parseCtx.getPrunedPartitions(), parseCtx); parseCtx.getOpToPartList().put((TableScanOperator)topOp, partsList); } } catch (SemanticException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (working copy) @@ -207,7 +207,7 @@ if (partsList == null) { partsList = PartitionPruner.prune(destTable, pGraphContext .getOpToPartPruner().get(ts), pGraphContext.getConf(), table, - pGraphContext.getPrunedPartitions()); + pGraphContext.getPrunedPartitions(), pGraphContext); pGraphContext.getOpToPartList().put(ts, partsList); } } catch (HiveException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (working copy) @@ -204,7 +204,7 @@ prunedParts = pGraphContext.getOpToPartList().get(tso); if (prunedParts == null) { prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, - pGraphContext.getPrunedPartitions()); + pGraphContext.getPrunedPartitions(), pGraphContext); pGraphContext.getOpToPartList().put(tso, prunedParts); } } catch (HiveException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -112,6 +112,7 @@ import org.apache.hadoop.hive.ql.plan.UnlockTableDesc; import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes; import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.stats.StatsSetupConst; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; @@ -1633,6 +1634,8 @@ outputFormattCls = tbl.getOutputFormatClass().getName(); } + String rowCount = tbl.getProperty(StatsSetupConst.ROW_COUNT); + String owner = tbl.getOwner(); List cols = tbl.getCols(); String ddlCols = MetaStoreUtils.getDDLFromFieldSchema("columns", cols); @@ -1655,6 +1658,10 @@ outStream.write(terminator); outStream.writeBytes("columns:" + ddlCols); outStream.write(terminator); + if (rowCount != null) { + outStream.writeBytes("rows:" + rowCount); + } + outStream.write(terminator); outStream.writeBytes("partitioned:" + isPartitioned); outStream.write(terminator); outStream.writeBytes("partitionColumns:" + partitionCols); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java (working copy) @@ -309,6 +309,10 @@ return destToLimit.get(dest); } + public HashMap getDestToLimit() { + return destToLimit; + } + /** * @return the outerQueryLimit */ Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy) @@ -583,10 +583,19 @@ public static enum SpecType {TABLE_ONLY, STATIC_PARTITION, DYNAMIC_PARTITION}; public SpecType specType; + /* Constructor for a "dummy" tableSpec used for stats publishing */ + public tableSpec(Table tableHandle, String tableName, List partitions) { + this.tableName = tableName; + this.tableHandle = tableHandle; + this.partitions = partitions; + } + public tableSpec(Hive db, HiveConf conf, ASTNode ast) throws SemanticException { - assert (ast.getToken().getType() == HiveParser.TOK_TAB || ast.getToken().getType() == HiveParser.TOK_TABTYPE); + assert (ast.getToken().getType() == HiveParser.TOK_TAB + || ast.getToken().getType() == HiveParser.TOK_TABTYPE + || ast.getToken().getType() == HiveParser.TOK_TABREF) : ast.dump(); int childIndex = 0; numDynParts = 0; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1042839) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -41,6 +41,8 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec; +import org.apache.hadoop.hive.ql.metadata.Partition; /** * Parse Context: The current parse context. This is passed to the optimizer @@ -77,6 +79,17 @@ private Map> groupOpToInputTables; private Map prunedPartitions; + private Map aliasToInputTableSpecs; + + public void setInputTableSpecs(String key, tableSpec ts) { + aliasToInputTableSpecs.put(key, ts); + } + + public tableSpec getInputTableSpecs(String key) { + tableSpec ts = aliasToInputTableSpecs.get(key); + return ts; + } + /** * The lineage information. */ @@ -163,6 +176,7 @@ this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer; hasNonPartCols = false; this.groupOpToInputTables = new HashMap>(); + this.aliasToInputTableSpecs = new HashMap(); this.groupOpToInputTables = groupOpToInputTables; this.prunedPartitions = prunedPartitions; this.opToSamplePruner = opToSamplePruner;