diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 15e6c34fa5..cd6f99988a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3655,7 +3655,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, int numParts = 0; for (Partition partition : parts) { Map props = partition.getParameters(); - Boolean state = StatsSetupConst.areBasicStatsUptoDate(props); + Boolean state = StatsUtils.areBasicStatsUptoDate(tbl, props); for (String stat : StatsSetupConst.supportedStats) { stateMap.put(stat, stateMap.get(stat) && state); if (props != null && props.get(stat) != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index a574372dbe..db1fc44f17 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -68,6 +68,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin; @@ -446,7 +447,7 @@ else if (udaf instanceof GenericUDAFCount) { String colName = desc.getColumn(); StatType type = getType(desc.getTypeString()); if (!tbl.isPartitioned()) { - if (!StatsSetupConst.areBasicStatsUptoDate(tbl.getParameters())) { + if (!StatsUtils.areBasicStatsUptoDate(tbl, tbl.getParameters())) { Logger.debug("Stats for table : " + tbl.getTableName() + " are not up to date."); return null; } @@ -455,7 +456,7 @@ else if (udaf instanceof GenericUDAFCount) { Logger.debug("Table doesn't have up to date stats " + tbl.getTableName()); return null; } - if (!StatsSetupConst.areColumnStatsUptoDate(tbl.getParameters(), colName)) { + if (!StatsUtils.areColumnStatsUptoDate(tbl, tbl.getParameters(), colName)) { Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + " are not up to date."); return null; @@ -478,7 +479,7 @@ else if (udaf instanceof GenericUDAFCount) { Set parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp) .getPartitions(); for (Partition part : parts) { - if (!StatsSetupConst.areBasicStatsUptoDate(part.getParameters())) { + if (!StatsUtils.areBasicStatsUptoDate(part.getTable(), part.getParameters())) { Logger.debug("Stats for part : " + part.getSpec() + " are not up to date."); return null; } @@ -516,7 +517,7 @@ else if (udaf instanceof GenericUDAFCount) { String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if(!tbl.isPartitioned()) { - if (!StatsSetupConst.areColumnStatsUptoDate(tbl.getParameters(), colName)) { + if (!StatsUtils.areColumnStatsUptoDate(tbl, tbl.getParameters(), colName)) { Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + " are not up to date."); return null; @@ -657,7 +658,7 @@ else if (udaf instanceof GenericUDAFCount) { String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if (!tbl.isPartitioned()) { - if (!StatsSetupConst.areColumnStatsUptoDate(tbl.getParameters(), colName)) { + if (!StatsUtils.areColumnStatsUptoDate(tbl, tbl.getParameters(), colName)) { Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + " are not up to date."); return null; @@ -888,7 +889,7 @@ private ColumnStatisticsData validateSingleColStat(List sta Hive hive, Table tbl, String colName, Set parts) throws TException { List partNames = new ArrayList(parts.size()); for (Partition part : parts) { - if (!StatsSetupConst.areColumnStatsUptoDate(part.getParameters(), colName)) { + if (!StatsUtils.areColumnStatsUptoDate(part.getTable(), part.getParameters(), colName)) { Logger.debug("Stats for part : " + part.getSpec() + " column " + colName + " are not up to date."); return null; @@ -910,7 +911,7 @@ private Long getRowCnt( if (tbl.isPartitioned()) { for (Partition part : pctx.getPrunedPartitions( tsOp.getConf().getAlias(), tsOp).getPartitions()) { - if (!StatsSetupConst.areBasicStatsUptoDate(part.getParameters())) { + if (!StatsUtils.areBasicStatsUptoDate(part.getTable(), part.getParameters())) { return null; } Long partRowCnt = Long.parseLong(part.getParameters().get(StatsSetupConst.ROW_COUNT)); @@ -921,7 +922,7 @@ private Long getRowCnt( rowCnt += partRowCnt; } } else { // unpartitioned table - if (!StatsSetupConst.areBasicStatsUptoDate(tbl.getParameters())) { + if (!StatsUtils.areBasicStatsUptoDate(tbl, tbl.getParameters())) { return null; } rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java index 1edef98ea9..c820088d8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java @@ -296,8 +296,8 @@ private ColStatistics extractColStats(RexInputRef ref) { if (table != null) { ColStatistics colStats = table.getColStat(Lists.newArrayList(columnOrigin.getOriginColumnOrdinal())).get(0); - if (colStats != null && StatsSetupConst.areColumnStatsUptoDate( - table.getHiveTableMD().getParameters(), colStats.getColumnName())) { + if (colStats != null && StatsUtils.areColumnStatsUptoDate( + table.getHiveTableMD(), table.getHiveTableMD().getParameters(), colStats.getColumnName())) { return colStats; } } @@ -310,7 +310,8 @@ private Long extractRowCount(RexInputRef ref) { if (columnOrigin != null) { RelOptHiveTable table = (RelOptHiveTable) columnOrigin.getOriginTable(); if (table != null) { - if (StatsSetupConst.areBasicStatsUptoDate(table.getHiveTableMD().getParameters())) { + if (StatsUtils.areBasicStatsUptoDate(table.getHiveTableMD(), + table.getHiveTableMD().getParameters())) { return StatsUtils.getNumRows(table.getHiveTableMD()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index 8c23887176..8ea463cb31 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -167,7 +167,7 @@ public Object process(StatsAggregator statsAggregator) throws HiveException, Met if (statsAggregator != null) { // Update stats for transactional tables (MM, or full ACID with overwrite), even // though we are marking stats as not being accurate. - if (StatsSetupConst.areBasicStatsUptoDate(parameters) || p.isTransactionalTable()) { + if (StatsUtils.areBasicStatsUptoDate(p.getTable(), parameters) || p.isTransactionalTable()) { String prefix = getAggregationPrefix(p.getTable(), p.getPartition()); updateStats(statsAggregator, parameters, prefix, p.isAcid()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index cef87f5957..d9537de2c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1989,4 +1989,18 @@ public static Range combineRange(Range range1, Range range2) { } return null; } + + /** + * Same as equivalent method in StatsSetupConst, but with ql.Table param + */ + public static boolean areBasicStatsUptoDate(Table table, Map params) { + return StatsSetupConst.areBasicStatsUptoDate(table.getTTable(), params); + } + + /** + * Same as equivalent method in StatsSetupConst, but with ql.Table param + */ + public static boolean areColumnStatsUptoDate(Table table, Map params, String colName) { + return StatsSetupConst.areColumnStatsUptoDate(table.getTTable(), params, colName); + } } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 78ea01d968..c8c9f1fa62 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -23,8 +23,10 @@ import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -199,16 +201,18 @@ public Boolean deserialize(JsonParser jsonParser, } - public static boolean areBasicStatsUptoDate(Map params) { - if (params == null) { + public static boolean areBasicStatsUptoDate(Table table, Map params) { + // HIVE-19332: external tables should not be considered to have up-to-date stats. + if (params == null || MetaStoreUtils.isExternalTable(table)) { return false; } ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); return stats.basicStats; } - public static boolean areColumnStatsUptoDate(Map params, String colName) { - if (params == null) { + public static boolean areColumnStatsUptoDate(Table table, Map params, String colName) { + // HIVE-19332: external tables should not be considered to have up-to-date stats. + if (params == null || MetaStoreUtils.isExternalTable(table)) { return false; } ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));