diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java index 4a760109041e0fb2b816e79b76cf30cb6a82845e..5e9917af1b54786fab3217a35a495d2eea8c1c2b 100644 --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -424,9 +424,7 @@ private String getFinalDynamicPartitionDestination(Table table, Map entry : storer.getProperties().entrySet()) { - if (!entry.getKey().toString().equals(StatsSetupConst.COLUMN_STATS_ACCURATE)) { - params.put(entry.getKey().toString(), entry.getValue().toString()); - } + params.put(entry.getKey().toString(), entry.getValue().toString()); } return params; } @@ -761,11 +759,9 @@ private void registerPartitions(JobContext context) throws IOException{ if (!src.equals(tblPath)) { fs.delete(src, true); } - if (table.getParameters() != null - && table.getParameters().containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) { - table.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); - client.alter_table(table.getDbName(), table.getTableName(), table.getTTable()); - } + // Won't work for an ACID table. + client.alterTableBasicStats(table.getCatName(), table.getDbName(), table.getTableName(), false, null, 0, null); + client.invalidateAllColumnStats(table.getCatName(), table.getDbName(), table.getTableName(), null, 0); return; } diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java index e16674d99fd09602b3454adfbcffbccfba542ea4..b433a819598c93c9104573b3fd057bed1973dea9 100644 --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java @@ -216,7 +216,7 @@ public void createTable() throws Exception { if (isTableImmutable()){ tableParams.put(hive_metastoreConstants.IS_IMMUTABLE,"true"); } - StatsSetupConst.setBasicStatsState(tableParams, StatsSetupConst.TRUE); + tbl.setIsStatsCompliant(true); tableParams.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "false"); tbl.setParameters(tableParams); diff --git itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index be40395cc36169791084178f52774647ec0e3685..431f542f146941ee3db239b24eaeb1e5f995b387 100644 --- itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -1304,10 +1304,31 @@ public int deleteRuntimeStats(int maxRetainSecs) throws MetaException { } @Override - public Map> getPartitionColsWithStats(String catName, - String dbName, String tableName) throws MetaException, + public Map> getPartitionColsWithAccurateStats(String catName, + String dbName, String tableName, String validWriteId, boolean isAccurate) throws MetaException, NoSuchObjectException { return null; } + @Override + public Table alterTableBasicStats(String catName, String dbname, String name, + Map stats, boolean isAccurate, long writeId, + String validWriteIds) throws MetaException { + return null; + } + + @Override + public Partition alterPartitionBasicStats(String catName, String dbname, + String name, List partVals, Map stats, + boolean isAccurate, long writeId, String validWriteIds) + throws MetaException { + return null; + } + + @Override + public Map invalidateAllColumnStatistics(String catName, + String dbName, String tblName, List partNames, long writeId) + throws MetaException, NoSuchObjectException { + return null; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 939ef360c27d067fcbbecaec697c09ab6b3613f0..ecede96b7e0d0a3f4889f0c319cfc20ad0840eea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -57,6 +57,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.util.concurrent.ListenableFuture; + import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -245,6 +246,7 @@ import org.apache.hadoop.hive.ql.plan.TruncateTableDesc; import org.apache.hadoop.hive.ql.plan.UnlockDatabaseDesc; import org.apache.hadoop.hive.ql.plan.UnlockTableDesc; +import org.apache.hadoop.hive.ql.plan.UpdateStatsDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationTranslator; @@ -427,6 +429,11 @@ public int execute(DriverContext driverContext) { return addPartitions(db, addPartitionDesc); } + UpdateStatsDesc updateStatsDesc = work.getUpdateStatsDesc(); + if (updateStatsDesc != null) { + return updateStats(db, updateStatsDesc); + } + RenamePartitionDesc renamePartitionDesc = work.getRenamePartitionDesc(); if (renamePartitionDesc != null) { return renamePartition(db, renamePartitionDesc); @@ -667,6 +674,18 @@ public int execute(DriverContext driverContext) { return 0; } + private int updateStats(Hive db, UpdateStatsDesc desc) throws HiveException { + switch (desc.getOpType()) { + case INVALIDATE_ALL: { + db.invalidateStats(desc.getCatName(), desc.getDbName(), + desc.getTableName(), desc.getPartName(), true, true); + break; + } + default: throw new AssertionError(desc.getOpType()); + } + return 0; + } + private int createResourcePlan(Hive db, CreateResourcePlanDesc createResourcePlanDesc) throws HiveException { db.createResourcePlan(createResourcePlanDesc.getResourcePlan(), @@ -3648,7 +3667,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, int numParts = 0; for (Partition partition : parts) { Map props = partition.getParameters(); - Boolean state = StatsSetupConst.areBasicStatsUptoDate(props); + Boolean state = partition.getTPartition().isIsStatsCompliant(); for (String stat : StatsSetupConst.SUPPORTED_STATS) { stateMap.put(stat, stateMap.get(stat) && state); if (props != null && props.get(stat) != null) { @@ -3658,8 +3677,8 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, numParts++; } for (String stat : StatsSetupConst.SUPPORTED_STATS) { - StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat))); tblProps.put(stat, valueMap.get(stat).toString()); + tblProps.put(stat + " ACCURATE", stateMap.get(stat).toString()); } tblProps.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(numParts)); tbl.setParameters(tblProps); @@ -3688,18 +3707,12 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses()); ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data); colStats = Collections.singletonList(cso); - StatsSetupConst.setColumnStatsState(tblProps, colNames); } else { cols = Hive.getFieldsFromDeserializer(colPath, deserializer); List parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1); AggrStats aggrStats = db.getAggrColStatsFor( dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts, false); colStats = aggrStats.getColStats(); - if (parts.size() == aggrStats.getPartsFound()) { - StatsSetupConst.setColumnStatsState(tblProps, colNames); - } else { - StatsSetupConst.removeColumnStatsState(tblProps, colNames); - } } tbl.setParameters(tblProps); } else { @@ -5078,8 +5091,7 @@ private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exceptio if (crtTbl.getLocation() == null && !tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(), - MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); + StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters()); } // create the table diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 7818efbbf540d2c159a6f12301621fcd232d27ea..f054c8a60eceeefd73d34b4cad854f835eecd8e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -33,7 +33,9 @@ import java.util.regex.Pattern; import org.apache.avro.generic.GenericData; + import com.google.common.base.Preconditions; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -1680,6 +1682,11 @@ public static TableSnapshot getTableSnapshot(Configuration conf, if (tblName == null) { tblName = tbl.getTableName(); } + return getTableSnapshotForTxnTable(conf, dbName, tblName, isStatsUpdater); + } + + public static TableSnapshot getTableSnapshotForTxnTable(Configuration conf, + String dbName, String tblName, boolean isStatsUpdater) throws LockException, AssertionError { long writeId = -1; ValidWriteIdList validWriteIdList = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java index 1b11e0e762213abc32634a966bba4b6aa7a2f07c..e1bcb48a8afc4bd889a2f28da2f3ee20484a4c78 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/encoded/EncodedReaderImpl.java @@ -1004,7 +1004,7 @@ private CacheChunk prepareRangesForCompressedRead(long cOffset, long endCOffset, if (current instanceof CacheChunk) { // 2a. This is a decoded compression buffer, add as is. CacheChunk cc = (CacheChunk)current; - if (isTracingEnabled) { // TODO# HERE unaccompanied lock + if (isTracingEnabled) { LOG.trace("Locking " + cc.getBuffer() + " due to reuse"); } cacheWrapper.reuseBuffer(cc.getBuffer()); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 239a606fdb730d10475cd85814ba345c93e42d58..82915c657ec14fc76faf990e213fbde41e188521 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -126,6 +126,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType; +import org.apache.hadoop.hive.ql.plan.UpdateStatsDesc; import org.apache.hadoop.hive.ql.session.CreateTableAutomaticGrant; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.Deserializer; @@ -1908,9 +1909,7 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par } // column stats will be inaccurate - if (!hasFollowingStatsTask) { - StatsSetupConst.clearColumnStatsState(newTPart.getParameters()); - } + boolean areColStatsInvalid = !hasFollowingStatsTask; // recreate the partition if it existed before if (isSkewedStoreAsSubdir) { @@ -1923,14 +1922,11 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps); newCreatedTpart.getSd().setSkewedInfo(skewedInfo); } - if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE); - } + boolean areBasicStatsInvalid = !this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER); if (oldPart == null) { newTPart.getTPartition().setParameters(new HashMap()); if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters(), - MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); + StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters()); } // Note: we are creating a brand new the partition, so this is going to be valid for ACID. List filesForStats = null; @@ -1963,6 +1959,9 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par // In that case, we want to retry with alterPartition. LOG.debug("Caught AlreadyExistsException, trying to alter partition instead"); setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart, tableSnapshot); + if (areBasicStatsInvalid || areColStatsInvalid) { + invalidatePartStats(newTPart, areColStatsInvalid, areBasicStatsInvalid); + } } catch (Exception e) { try { final FileSystem newPathFileSystem = newPartPath.getFileSystem(this.getConf()); @@ -1982,6 +1981,9 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par } } else { setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart, tableSnapshot); + if (areBasicStatsInvalid || areColStatsInvalid) { + invalidatePartStats(newTPart, areColStatsInvalid, areBasicStatsInvalid); + } } perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_PARTITION); @@ -2001,7 +2003,6 @@ public Partition loadPartition(Path loadPath, Table tbl, Map par } } - private static Path genPartPathFromTable(Table tbl, Map partSpec, Path tblDataLocationPath) throws MetaException { Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec)); @@ -2500,14 +2501,8 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType } perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES); } - if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); - } - - //column stats will be inaccurate - if (!hasFollowingStatsTask) { - StatsSetupConst.clearColumnStatsState(tbl.getParameters()); - } + boolean areBasicStatsInvalid = !this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER); + boolean areColStatsInvalid = !hasFollowingStatsTask; try { if (isSkewedStoreAsSubdir) { @@ -2530,6 +2525,9 @@ public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType } alterTable(tbl, false, environmentContext, true); + if (areBasicStatsInvalid || areColStatsInvalid) { + invalidateTblStats(tbl, areBasicStatsInvalid, areColStatsInvalid); + } if (AcidUtils.isTransactionalTable(tbl)) { addWriteNotificationLog(tbl, null, newFiles, writeId); @@ -2572,12 +2570,16 @@ public Partition createPartition(Table tbl, Map partSpec) throws new ArrayList(size); AcidUtils.TableSnapshot tableSnapshot = AcidUtils.getTableSnapshot(conf, tbl); + Boolean isAccurate = addPartitionDesc.areBasicStatsAccurate(); for (int i = 0; i < size; ++i) { org.apache.hadoop.hive.metastore.api.Partition tmpPart = convertAddSpecToMetaPartition(tbl, addPartitionDesc.getPartition(i), conf); if (tmpPart != null && tableSnapshot != null && tableSnapshot.getWriteId() > 0) { tmpPart.setWriteId(tableSnapshot.getWriteId()); } + if (isAccurate != null) { + tmpPart.setIsStatsCompliant(isAccurate); + } in.add(tmpPart); } List out = new ArrayList(); @@ -5452,4 +5454,96 @@ public StorageHandlerInfo getStorageHandlerInfo(Table table) throw new HiveException(e); } } + + public void invalidateTblStats( + Table tbl, boolean areBasicStatsInvalid, boolean areColStatsInvalid) throws HiveException { + invalidateStats(tbl.getCatalogName(), tbl.getDbName(), tbl.getTableName(), + null, areBasicStatsInvalid, areColStatsInvalid); + } + + private void invalidatePartStats( + Partition part, boolean areColStatsInvalid, boolean areBasicStatsInvalid) throws HiveException { + Table tbl = part.getTable(); + invalidateStats(tbl.getCatalogName(), tbl.getDbName(), tbl.getTableName(), + part.getName(), areBasicStatsInvalid, areColStatsInvalid); + } + + + public void invalidateStats(String catName, String dbName, String tableName, String partName, + boolean areBasicStatsInvalid, boolean areColStatsInvalid) throws HiveException { + if (catName == null) { + catName = getDefaultCatalog(conf); + } + try { + AcidUtils.TableSnapshot tableSnapshot = ensureSnapshot(catName, dbName, tableName); + + long writeId = tableSnapshot == null ? 0 : tableSnapshot.getWriteId(); + if (areBasicStatsInvalid) { + if (partName == null) { + getMSC().alterTableBasicStats(catName, dbName, tableName, false, null, + writeId, tableSnapshot == null ? null : tableSnapshot.getValidWriteIdList()); + } else { + getMSC().alterPartitionBasicStats(catName, dbName, tableName, partName, false, null, + writeId, tableSnapshot == null ? null : tableSnapshot.getValidWriteIdList()); + } + } + if (areColStatsInvalid) { + getMSC().invalidateAllColumnStats(catName, dbName, tableName, null, writeId); + } + } catch (MetaException e) { + throw new HiveException("Unable to update stats; " + e.getMessage(), e); + } catch (TException e) { + throw new HiveException("Unable to update stats; " + e.getMessage(), e); + } + } + + + public void updateBasicTableStats(String catName, String dbName, + String tableName, boolean isAccurate, Map newStats) throws HiveException { + if (catName == null) { + catName = getDefaultCatalog(conf); + } + try { + AcidUtils.TableSnapshot tableSnapshot = ensureSnapshot(catName, dbName, tableName); + getMSC().alterTableBasicStats(catName, dbName, tableName, isAccurate, newStats, + tableSnapshot == null ? 0 : tableSnapshot.getWriteId(), + tableSnapshot == null ? null : tableSnapshot.getValidWriteIdList()); + } catch (TException e) { + throw new HiveException("Unable to update stats; " + e.getMessage(), e); + } + } + + + private AcidUtils.TableSnapshot ensureSnapshot(String catName, String dbName, String tableName) + throws MetaException, TException, LockException, AssertionError { + org.apache.hadoop.hive.metastore.api.Table tbl = getMSC().getTable(catName, dbName, tableName); + if (AcidUtils.isTransactionalTable(tbl)) return null; + AcidUtils.TableSnapshot tableSnapshot = AcidUtils.getTableSnapshotForTxnTable( + conf, dbName, tableName, true); + if (tableSnapshot == null) { + LOG.warn("Cannot get a table snapshot for " + tableName); + } + return tableSnapshot; + } + + + public void updateBasicPartitionsStats(String catName, String dbName, + String tableName, boolean isAccurate, List partNames, + List> partStats) throws HiveException { + if (catName == null) { + catName = getDefaultCatalog(conf); + } + try { + AcidUtils.TableSnapshot tableSnapshot = ensureSnapshot(catName, dbName, tableName); + long writeId = tableSnapshot == null ? 0 : tableSnapshot.getWriteId(); + String validWriteIds = tableSnapshot == null ? null : tableSnapshot.getValidWriteIdList(); + for (int i = 0; i < partNames.size(); ++i) { + // TODO: change the API to a bulk call? not using ColumnStatsDesc. + getMSC().alterPartitionBasicStats(catName, dbName, tableName, partNames.get(i), + isAccurate, partStats.get(i), writeId, validWriteIds); + } + } catch (TException e) { + throw new HiveException("Unable to update stats; " + e.getMessage(), e); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 3240f2d31597741ee8b32db7fc4b07b8c5413a5e..64144610326f4bce567ca6d33dc8ccdcf6c8c140 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -504,8 +504,7 @@ private void createTempTable(org.apache.hadoop.hive.metastore.api.Table tbl, // Add temp table info to current session Table tTable = new Table(tbl); if (!isVirtualTable) { - StatsSetupConst.setStatsStateForCreateTable(tbl.getParameters(), - org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getColumnNamesForTable(tbl), StatsSetupConst.TRUE); + StatsSetupConst.setStatsStateForCreateTable(tbl.getParameters()); } if (tables == null) { tables = new HashMap(); @@ -627,11 +626,6 @@ private boolean needToUpdateStats(Map props, EnvironmentContext e props.put(stat, "0"); } } - //first set basic stats to true - StatsSetupConst.setBasicStatsState(props, StatsSetupConst.TRUE); - environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK); - //then invalidate column stats - StatsSetupConst.clearColumnStatsState(props); return statsPresent; } @@ -664,6 +658,8 @@ private void truncateTempTable(org.apache.hadoop.hive.metastore.api.Table table) EnvironmentContext environmentContext = new EnvironmentContext(); if (needToUpdateStats(table.getParameters(), environmentContext)) { + // Use alter table here, since this is a temp table. + table.setIsStatsCompliant(true); alter_table_with_environmentContext(table.getDbName(), table.getTableName(), table, environmentContext); } } catch (Exception e) { @@ -797,9 +793,9 @@ private boolean updateTempTableColumnStats(String dbName, String tableName, List colNames = new ArrayList<>(); for (ColumnStatisticsObj obj : colStats.getStatsObj()) { colNames.add(obj.getColName()); + obj.setIsStatsCompliant(true); } org.apache.hadoop.hive.metastore.api.Table table = getTempTable(dbName, tableName); - StatsSetupConst.setColumnStatsState(table.getParameters(), colNames); return true; } @@ -810,6 +806,7 @@ private static void mergeColumnStats(Map oldStats, for (ColumnStatisticsObj colStat : newColList) { // This is admittedly a bit simple, StatsObjectConverter seems to allow // old stats attributes to be kept if the new values do not overwrite them. + // TODO: ...and it is probably incorrect oldStats.put(colStat.getColName().toLowerCase(), colStat); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index 36cd46aa43f23f482744e7d98a71dd9905d319b0..df3cb7f728a2c6d10456cb249a931399e23388f4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -640,6 +640,7 @@ static void formatOutput(String name, String value, StringBuilder tableInfo, List ret = new ArrayList<>(); ret.add(col.getName()); ret.add(col.getType()); + ret.add((columnStatisticsObj.isIsStatsCompliant() ? "" : "not ") + "accurate"); if (isColStatsAvailable) { if (columnStatisticsObj != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java index 705365b74c3f9dff4f9fdf01e77027fa9d158101..cecb859126a1482d3dc0799a457c311520a941c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java @@ -169,12 +169,10 @@ public void describeTable(DataOutputStream outStream, String colPath, output += mdt.renderTable(isOutputPadded); } } else { - String statsState; - if (tbl.getParameters() != null && (statsState = tbl.getParameters().get(StatsSetupConst.COLUMN_STATS_ACCURATE)) != null) { + if (tbl.getTTable().isSetIsStatsCompliant()) { StringBuilder str = new StringBuilder(); - MetaDataFormatUtils.formatOutput(StatsSetupConst.COLUMN_STATS_ACCURATE, - isFormatted ? StringEscapeUtils.escapeJava(statsState) : HiveStringUtils.escapeJava(statsState), - str, isOutputPadded); + MetaDataFormatUtils.formatOutput("Basic stats accurate", + Boolean.toString(tbl.getTTable().isIsStatsCompliant()), str, isOutputPadded); output = output.concat(str.toString()); } } @@ -610,7 +608,7 @@ public void showResourcePlans(DataOutputStream out, List resourc /** * Class to print text records for resource plans in the following format: - * + * * [status=,parallelism=,defaultPool=] * [allocFraction=,schedulingPolicy=,parallelism=] * > : if(){} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index b8d43751126b291a87913d5b6c82fad2412885ed..43735b93ae204e6deb96eabd35be22b21e7e5d92 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -465,11 +465,6 @@ else if (udaf instanceof GenericUDAFCount) { Logger.debug("Table doesn't have up to date stats " + tbl.getTableName()); return null; } - if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(tbl, tbl.getParameters(), colName)) { - Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName - + " are not up to date."); - return null; - } List stats = hive.getMSC().getTableColumnStatistics( @@ -480,7 +475,13 @@ else if (udaf instanceof GenericUDAFCount) { Logger.debug("No stats for " + tbl.getTableName() + " column " + colName); return null; } - Long nullCnt = getNullcountFor(type, stats.get(0).getStatsData()); + ColumnStatisticsObj stat = stats.get(0); + if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(tbl, stat)) { + Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + + " are not up to date."); + return null; + } + Long nullCnt = getNullcountFor(type, stat.getStatsData()); if (null == nullCnt) { Logger.debug("Unsupported type: " + desc.getTypeString() + " encountered in " + "metadata optimizer for column : " + colName); @@ -529,13 +530,7 @@ else if (udaf instanceof GenericUDAFCount) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()); String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); - if(!tbl.isPartitioned()) { - if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(tbl, tbl.getParameters(), colName)) { - Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName - + " are not up to date."); - return null; - } - + if (!tbl.isPartitioned()) { List stats = hive.getMSC().getTableColumnStatistics( tbl.getDbName(), tbl.getTableName(), @@ -546,6 +541,12 @@ else if (udaf instanceof GenericUDAFCount) { return null; } ColumnStatisticsData statData = stats.get(0).getStatsData(); + if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(tbl, stats.get(0))) { + Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + + " are not up to date."); + return null; + } + String name = colDesc.getTypeString().toUpperCase(); switch (type) { case Integer: { @@ -675,17 +676,23 @@ else if (udaf instanceof GenericUDAFCount) { String colName = colDesc.getColumn(); StatType type = getType(colDesc.getTypeString()); if (!tbl.isPartitioned()) { - if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(tbl, tbl.getParameters(), colName)) { + List stats = + hive.getMSC().getTableColumnStatistics( + tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName), + tableSnapshot != null ? tableSnapshot.getValidWriteIdList() : null); + if (stats.isEmpty()) { + return null; + } + + if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(tbl, stats.get(0))) { Logger.debug("Stats for table : " + tbl.getTableName() + " column " + colName + " are not up to date."); return null; } - ColumnStatisticsData statData = - hive.getMSC().getTableColumnStatistics( - tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName), - tableSnapshot != null ? tableSnapshot.getValidWriteIdList() : null) - .get(0).getStatsData(); + + ColumnStatisticsData statData = stats.get(0).getStatsData(); String name = colDesc.getTypeString().toUpperCase(); + switch (type) { case Integer: { LongSubType subType = LongSubType.valueOf(name); @@ -908,11 +915,6 @@ private ColumnStatisticsData validateSingleColStat(List sta Hive hive, Table tbl, String colName, Set parts) throws TException, LockException { List partNames = new ArrayList(parts.size()); for (Partition part : parts) { - if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(part.getTable(), part.getParameters(), colName)) { - Logger.debug("Stats for part : " + part.getSpec() + " column " + colName - + " are not up to date."); - return null; - } partNames.add(part.getName()); } AcidUtils.TableSnapshot tableSnapshot = @@ -925,6 +927,14 @@ private ColumnStatisticsData validateSingleColStat(List sta Logger.debug("Received " + result.size() + " stats for " + parts.size() + " partitions"); return null; } + for (List stats : result.values()) { + for (ColumnStatisticsObj stat : stats) { + if (!StatsUtils.areColumnStatsUptoDateForQueryAnswering(tbl, stat)) { + Logger.debug("Stats for column " + colName + " are not up to date."); + return null; + } + } + } return result.values(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 37e6d4c85013b88c4961c28ed57566276f5c936a..597b13e729af849a6cfcd9ce9658a6698b314642 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -557,6 +557,7 @@ private void updateColStats(Set projIndxLst, boolean allowMissingStats) // no need to make a metastore call rowCount = 0; hiveColStats = new ArrayList(); + // Note: we don't set isAccurate here; no metastore objects to check. for (int i = 0; i < nonPartColNamesThatRqrStats.size(); i++) { // add empty stats object for each column hiveColStats.add( diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java index f7712e6c331a3740618ac30e104d08860d53d112..87761848ff352af83a17970eb92231684515c269 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java @@ -297,7 +297,7 @@ private ColStatistics extractColStats(RexInputRef ref) { ColStatistics colStats = table.getColStat(Lists.newArrayList(columnOrigin.getOriginColumnOrdinal()), false).get(0); if (colStats != null && StatsUtils.areColumnStatsUptoDateForQueryAnswering( - table.getHiveTableMD(), table.getHiveTableMD().getParameters(), colStats.getColumnName())) { + table.getHiveTableMD(), colStats.isAccurate())) { return colStats; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 01179c805ffd3d0480a4d9de871c04f90a08a8b7..5490381217bade53aa66e3ca243a50d143d392d0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1423,6 +1423,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, cs.setCountDistint(stats.getNumRows()); cs.setNumNulls(0); cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType)); + cs.setIsAccurate(false); aggColStats.add(cs); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 2b9f7636924034d787f3b756465211b8d126c7a3..eafb5e3a379e2db8df482e02bc0fbbbdb70816bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -3557,8 +3557,7 @@ private void analyzeAlterTableAddParts(String[] qualified, CommonTree ast, boole if (desc.getPartParams() == null) { desc.setPartParams(new HashMap()); } - StatsSetupConst.setStatsStateForCreateTable(desc.getPartParams(), - MetaStoreUtils.getColumnNames(tab.getCols()), StatsSetupConst.TRUE); + StatsSetupConst.setStatsStateForCreateTable(desc.getPartParams()); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index eb594f825d44179b92e1c7e5d350623b868634ed..6dfd290a394ff63d201e4e97897457409278b80e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -253,7 +253,7 @@ public static boolean prepareImport(boolean isImportCmd, boolean inReplicationScope = false; if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ tblDesc.setReplicationSpec(replicationSpec); - StatsSetupConst.setBasicStatsState(tblDesc.getTblProps(), StatsSetupConst.FALSE); + tblDesc.setAreStatAccurate(false); inReplicationScope = true; } @@ -278,8 +278,8 @@ public static boolean prepareImport(boolean isImportCmd, for (Partition partition : partitions) { // TODO: this should ideally not create AddPartitionDesc per partition AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition); - if (inReplicationScope){ - StatsSetupConst.setBasicStatsState(partsDesc.getPartition(0).getPartParams(), StatsSetupConst.FALSE); + if (inReplicationScope) { + partsDesc.setBasicStatsAccurate(false); } partitionDescs.add(partsDesc); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 474c793ec3961dddab866fe5f185a26233b082cf..39ec50f16c463804d05c64cc70ae9bf24ccaf618 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -205,6 +205,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.UpdateStatsDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; @@ -6928,17 +6929,10 @@ private void genPartnCols(String dest, Operator input, QB qb, } } - @SuppressWarnings("unchecked") private void setStatsForNonNativeTable(String dbName, String tableName) throws SemanticException { - String qTableName = DDLSemanticAnalyzer.getDotName(new String[] { dbName, - tableName }); - AlterTableDesc alterTblDesc = new AlterTableDesc(AlterTableTypes.DROPPROPS, null, false); - HashMap mapProp = new HashMap<>(); - mapProp.put(StatsSetupConst.COLUMN_STATS_ACCURATE, null); - alterTblDesc.setOldName(qTableName); - alterTblDesc.setProps(mapProp); - alterTblDesc.setDropIfExists(true); - this.rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc))); + UpdateStatsDesc desc = new UpdateStatsDesc( + null, dbName, tableName, null, UpdateStatsDesc.OpType.INVALIDATE_ALL); + this.rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc))); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AddPartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AddPartitionDesc.java index d3a87f97726689da6a49ddc988923d06682bb1d0..b6f7fcda91d099ce1526640f28c689f1eaced61e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AddPartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AddPartitionDesc.java @@ -155,6 +155,7 @@ public void setOutputFormat(String outputFormat) { List partitions = null; boolean replaceMode = false; private ReplicationSpec replicationSpec = null; + private Boolean areBasicStatsAccurate = null; /** @@ -324,4 +325,13 @@ public ReplicationSpec getReplicationSpec(){ } return this.replicationSpec; } + + public void setBasicStatsAccurate(boolean b) { + this.areBasicStatsAccurate = b; + + } + + public Boolean areBasicStatsAccurate() { + return areBasicStatsAccurate; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index a31f965a5fbe123f4cfec0ba8fe1171796ab7b5c..f6233e8fbd854d2aa221f11122fcdd8cd3081703 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -30,6 +30,7 @@ private Range range; private boolean isPrimaryKey; private boolean isEstimated; + private boolean isAccurate; public ColStatistics(String colName, String colType) { this.setColumnName(colName); @@ -152,9 +153,18 @@ public ColStatistics clone() { if (range != null ) { clone.setRange(range.clone()); } + clone.setIsAccurate(isAccurate); return clone; } + public void setIsAccurate(boolean isAccurate) { + this.isAccurate = isAccurate; + } + + public boolean isAccurate() { + return isAccurate; + } + public boolean isPrimaryKey() { return isPrimaryKey; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index 0fadf1b61f0cf086e34aadff78ff834fc65484f3..bf4b3b86e7c44e62763b5826d6616d360dc3e8b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -109,6 +109,7 @@ // The FSOP configuration for the FSOP that is going to write initial data during ctas. // This is not needed beyond compilation, so it is transient. private transient FileSinkDesc writer; + private Boolean areStatsAccurate = null; public CreateTableDesc() { } @@ -873,12 +874,13 @@ public Table toTable(HiveConf conf) throws HiveException { if (!this.isCTAS && (tbl.getPath() == null || (tbl.isEmpty() && !isExternal()))) { if (!tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(), - MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); + StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters()); + tbl.getTTable().setIsStatsCompliant(true); } + } else if (areStatsAccurate != null) { + tbl.getTTable().setIsStatsCompliant(areStatsAccurate); } else { - StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(), null, - StatsSetupConst.FALSE); + tbl.getTTable().setIsStatsCompliant(false); } return tbl; } @@ -891,7 +893,7 @@ public Long getInitialMmWriteId() { return initialMmWriteId; } - + public FileSinkDesc getAndUnsetWriter() { FileSinkDesc fsd = writer; @@ -902,4 +904,8 @@ public FileSinkDesc getAndUnsetWriter() { public void setWriter(FileSinkDesc writer) { this.writer = writer; } + + public void setAreStatsAccurate(boolean b) { + this.areStatsAccurate = b; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java index f0f7b18d192f85b489ccde4e8a80e92dc11a0494..e70245d118361c6e535aecb70930dd08027d9e01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateViewDesc.java @@ -68,6 +68,7 @@ private Map serdeProps; // only used for materialized views private Set tablesUsed; // only used for materialized views private ReplicationSpec replicationSpec = null; + private Boolean areStatsAccurate = null; /** * For serialization only. @@ -365,6 +366,8 @@ public Table toTable(HiveConf conf) throws HiveException { tbl.setOutputFormatClass(getOutputFormat()); } + tbl.getTTable().setIsStatsCompliant(areStatsAccurate); + if (isMaterialized()) { if (getLocation() != null) { tbl.setDataLocation(new Path(getLocation())); @@ -412,4 +415,8 @@ public Table toTable(HiveConf conf) throws HiveException { return tbl; } + + public void setAreStatsAccurate(boolean b) { + this.areStatsAccurate = b; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java index 8ed3b03a84656b91ce19e12c42163f258b9c74d2..c3bba5078a30be58880571590c1ba735c8edda28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java @@ -113,6 +113,8 @@ private AlterTablePartMergeFilesDesc mergeFilesDesc; private CacheMetadataDesc cacheMetadataDesc; + private UpdateStatsDesc updateStatsDesc; + public DDLWork() { } @@ -415,6 +417,11 @@ public DDLWork(HashSet inputs, HashSet outputs, this.addPartitionDesc = addPartitionDesc; } + public DDLWork(HashSet inputs, HashSet outputs, UpdateStatsDesc desc) { + this(inputs, outputs); + this.updateStatsDesc = desc; + } + /** * @param renamePartitionDesc * information about the partitions we want to add. @@ -1356,4 +1363,8 @@ public CreateOrDropTriggerToPoolMappingDesc getTriggerToPoolMappingDesc() { public void setTriggerToPoolMappingDesc(CreateOrDropTriggerToPoolMappingDesc triggerToPoolMappingDesc) { this.triggerToPoolMappingDesc = triggerToPoolMappingDesc; } + + public UpdateStatsDesc getUpdateStatsDesc() { + return updateStatsDesc; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java index ef7325fe2c0f12eeb047a570e8006cee6937e5af..842d768bf442a0a3091aeb0b50429e62ce92693c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ImportTableDesc.java @@ -359,4 +359,15 @@ public Table toTable(HiveConf conf) throws Exception { return null; } } + + public void setAreStatAccurate(boolean b) { + switch (getDescType()) { + case TABLE: + createTblDesc.setAreStatsAccurate(b); + case VIEW: + createViewDesc.setAreStatsAccurate(b); + default: + throw new AssertionError(getDescType()); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/UpdateStatsDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/UpdateStatsDesc.java new file mode 100644 index 0000000000000000000000000000000000000000..824460d80298587973aa4d427b6f5d3b62616da1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/UpdateStatsDesc.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.plan.DDLDesc.DDLDescWithWriteId; + +public class UpdateStatsDesc extends DDLDesc implements DDLDescWithWriteId { + private static final long serialVersionUID = 1L; + + public enum OpType { + INVALIDATE_ALL, // TODO: add as needed - set stats, and such + } + + private final String catName, dbName, tableName, partName; + private final OpType opType; + private long writeId; + + public UpdateStatsDesc(String catName, String dbName, String tableName, + String partName, OpType opType) { + super(); + this.catName = catName; + this.dbName = dbName; + this.tableName = tableName; + this.partName = partName; + this.opType = opType; + } + + @Override + public void setWriteId(long writeId) { + this.writeId = writeId; + } + + @Override + public String getFullTableName() { + return catName + "." + dbName + "." + tableName; + } + + @Override + public boolean mayNeedWriteId() { + return true; + } + + public String getCatName() { + return catName; + } + + public String getDbName() { + return dbName; + } + + public String getTableName() { + return tableName; + } + + public String getPartName() { + return partName; + } + + public OpType getOpType() { + return opType; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java index 53b3065a88c84c1a4cdbc39aa6d29a42aa81f0b8..222a9c1a63b669648aa627e1c63a0dd96e531b72 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.stats; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -128,7 +130,8 @@ public String getName() { static class FooterStatCollector implements Runnable { private Partish partish; - private Object result; + private Map newStats; + private boolean areStatsValid, isPartition; private JobConf jc; private Path dir; private FileSystem fs; @@ -146,15 +149,10 @@ public String apply(FooterStatCollector sc) { return String.format("%s#%s", sc.partish.getTable().getCompleteName(), sc.partish.getPartishType()); } }; - private static final Function EXTRACT_RESULT_FUNCTION = new Function() { - @Override - public Partition apply(FooterStatCollector input) { - return (Partition) input.result; - } - }; + private boolean isValid() { - return result != null; + return newStats != null; } public void init(HiveConf conf, LogHelper console) throws IOException { @@ -166,7 +164,7 @@ public void init(HiveConf conf, LogHelper console) throws IOException { @Override public void run() { - Map parameters = partish.getPartParameters(); + Map stats = new HashMap<>(); try { long numRows = 0; long rawDataSize = 0; @@ -213,21 +211,16 @@ public void run() { } } - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE); - - parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows)); - parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); - parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); - parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); - parameters.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, String.valueOf(numErasureCodedFiles)); + stats.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows)); + stats.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); + stats.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); + stats.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); + stats.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, String.valueOf(numErasureCodedFiles)); + newStats = stats; + areStatsValid = true; + isPartition = partish.getPartition() != null; - if (partish.getPartition() != null) { - result = new Partition(partish.getTable(), partish.getPartition().getTPartition()); - } else { - result = new Table(partish.getTable().getTTable()); - } - - String msg = partish.getSimpleName() + " stats: [" + toString(parameters) + ']'; + String msg = partish.getSimpleName() + " stats: [" + toString(stats) + ']'; LOG.debug(msg); console.printInfo(msg); @@ -321,7 +314,7 @@ private int updatePartitions(Hive db, List scs, Table table } if (work.isStatsReliable()) { for (FooterStatCollector statsCollection : scs) { - if (statsCollection.result == null) { + if (statsCollection.newStats == null) { LOG.debug("Stats requested to be reliable. Empty stats found: {}", statsCollection.partish.getSimpleName()); return -1; } @@ -357,17 +350,24 @@ private int updatePartitions(Hive db, List scs, Table table throw new RuntimeException("very intresting"); } - if (values.get(0).result instanceof Table) { - db.alterTable(tableFullName, (Table) values.get(0).result, environmentContext, true); + FooterStatCollector f0 = values.get(0); + Table t = f0.partish.getTable(); + if (!f0.isPartition) { + db.updateBasicTableStats( + t.getCatName(), t.getDbName(), t.getTableName(), f0.areStatsValid, f0.newStats); LOG.debug("Updated stats for {}.", tableFullName); } else { - if (values.get(0).result instanceof Partition) { - List results = Lists.transform(values, FooterStatCollector.EXTRACT_RESULT_FUNCTION); - db.alterPartitions(tableFullName, results, environmentContext, true); - LOG.debug("Bulk updated {} partitions of {}.", results.size(), tableFullName); - } else { - throw new RuntimeException("inconsistent"); + List partNames = new ArrayList<>(values.size()); + List> partStats = new ArrayList<>(values.size()); + boolean isAccurate = true; // Currently we only send true flag from here. Improve? + for (FooterStatCollector fc : values) { + partNames.add(fc.partish.getPartition().getName()); + partStats.add(fc.newStats); + isAccurate = isAccurate && fc.areStatsValid; } + db.updateBasicPartitionsStats( + t.getCatName(), t.getDbName(), t.getTableName(), isAccurate, partNames, partStats); + LOG.debug("Bulk updated {} partitions of {}.", partStats.size(), tableFullName); } } LOG.debug("Updated stats for: {}", tableFullName); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index b9b4a442b74233d53cafdb970f04957c21caaa11..039f13c7093297bb7c1fd50cbf1dd74b3e2f9a51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -111,12 +111,13 @@ public String getName() { } private static class BasicStatsProcessor { - private Partish partish; private List partfileStatus; + private Map newStats; private boolean isMissingAcidState = false; private BasicStatsWork work; private boolean followedColStats1; + private boolean doSetBasicToAccurate, doInvalidateColStats; public BasicStatsProcessor(Partish partish, BasicStatsWork work, HiveConf conf, boolean followedColStats2) { this.partish = partish; @@ -124,20 +125,16 @@ public BasicStatsProcessor(Partish partish, BasicStatsWork work, HiveConf conf, followedColStats1 = followedColStats2; } - public Object process(StatsAggregator statsAggregator) throws HiveException, MetaException { - Partish p = partish; - Map parameters = p.getPartParameters(); - if (work.isTargetRewritten()) { - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE); - } + public boolean process(StatsAggregator statsAggregator) throws HiveException, MetaException { + newStats = StatsSetupConst.extractStats(partish.getPartParameters()); + doSetBasicToAccurate = work.isTargetRewritten(); + // work.getTableSpecs() == null means it is not analyze command // and then if it is not followed by column stats, we should clean // column stats // FIXME: move this to ColStat related part - if (!work.isExplicitAnalyze() && !followedColStats1) { - StatsSetupConst.clearColumnStatsState(parameters); - } + doInvalidateColStats = !work.isExplicitAnalyze() && !followedColStats1; if (partfileStatus == null) { // This may happen if ACID state is absent from config. @@ -145,32 +142,31 @@ public Object process(StatsAggregator statsAggregator) throws HiveException, Met : partish.getPartition().getSpec().toString(); LOG.warn("Partition/partfiles is null for: " + spec); if (isMissingAcidState) { - MetaStoreUtils.clearQuickStats(parameters); - return p.getOutput(); + MetaStoreUtils.clearQuickStats(newStats); + return true; } - return null; + return false; } // The collectable stats for the aggregator needs to be cleared. // For eg. if a file is being loaded, the old number of rows are not valid // XXX: makes no sense for me... possibly not needed anymore - if (work.isClearAggregatorStats()) { - // we choose to keep the invalid stats and only change the setting. - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); - } + // we choose to keep the invalid stats and only change the setting. + doSetBasicToAccurate = doSetBasicToAccurate && work.isClearAggregatorStats(); - MetaStoreUtils.populateQuickStats(partfileStatus, parameters); + // TODO: remove this crap + MetaStoreUtils.populateQuickStats(partfileStatus, newStats); if (statsAggregator != null) { // Update stats for transactional tables (MM, or full ACID with overwrite), even // though we are marking stats as not being accurate. - if (StatsSetupConst.areBasicStatsUptoDate(parameters) || p.isTransactionalTable()) { - String prefix = getAggregationPrefix(p.getTable(), p.getPartition()); - updateStats(statsAggregator, parameters, prefix); + if (doSetBasicToAccurate || partish.isTransactionalTable()) { + String prefix = getAggregationPrefix(partish.getTable(), partish.getPartition()); + updateStats(statsAggregator, newStats, prefix); } } - return p.getOutput(); + return true; } public void collectFileStatus(Warehouse wh, HiveConf conf) throws MetaException, IOException { @@ -259,11 +255,14 @@ private int aggregateStats(Hive db) { BasicStatsProcessor basicStatsProcessor = new BasicStatsProcessor(p, work, conf, followedColStats); basicStatsProcessor.collectFileStatus(wh, conf); - Table res = (Table) basicStatsProcessor.process(statsAggregator); - if (res == null) { + if (!basicStatsProcessor.process(statsAggregator)) { return 0; } - db.alterTable(tableFullName, res, environmentContext, true); + if (basicStatsProcessor.doInvalidateColStats) { + db.invalidateStats(table.getCatName(), table.getDbName(), table.getTableName(), null, false, true); + } + db.updateBasicTableStats(table.getCatName(), table.getDbName(), table.getTableName(), + basicStatsProcessor.doSetBasicToAccurate, basicStatsProcessor.newStats); if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { console.printInfo("Table " + tableFullName + " stats: [" + toString(p.getPartParameters()) + ']'); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java index 31c96826b02383fa26b90e67ca5f1257c495e6ba..705886d2040fc5fca4255982706b461ee561be2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java @@ -59,6 +59,7 @@ public static ColumnStatisticsObj readHiveStruct(String columnName, String colum ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColName(columnName); statsObj.setColType(columnType); + statsObj.setIsStatsCompliant(true); try { unpackStructObject(foi, f, fieldName, statsObj); return statsObj; diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUpdaterThread.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUpdaterThread.java index a50ec18b8aee0e24b632147f02c43526e0fe3071..14f2f39f4e01ccabf654ad3b857a56aff03ea1d1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUpdaterThread.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUpdaterThread.java @@ -19,10 +19,13 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; @@ -31,7 +34,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.common.ValidReaderWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; @@ -235,19 +237,34 @@ private void stopWorkers() { } Collections.sort(allCols); if (table.getPartitionKeysSize() == 0) { - Map params = table.getParameters(); - List colsToUpdate = null; long writeId = isTxn ? table.getWriteId() : -1; + boolean areBasicStatsValid = table.isSetIsStatsCompliant() && table.isIsStatsCompliant(); + if (isTxn) { + areBasicStatsValid = ObjectStore.isCurrentStatsValidForTheQuery( + conf, areBasicStatsValid, writeId, writeIdString, false); + } + + ColumnStatistics existingStats = null; + try { + // Note: this should NOT do txn verification - we want to get outdated stats, to + // see if we need to update anything. + existingStats = rs.getTableColumnStatistics(cat, db, tbl, allCols); + } catch (NoSuchObjectException e) { + LOG.error("Cannot retrieve existing stats, skipping " + fullTableName, e); + return null; + } + Collection colsToUpdate = null; if (isExistingOnly) { - // Get the existing stats, including the txn state if any, to see if we need to update. - colsToUpdate = getExistingNonPartTableStatsToUpdate( - fullTableName, cat, db, tbl, params, writeId, allCols, writeIdString); + colsToUpdate = getExistingStatsToUpdate( + existingStats, writeIdString, writeId, areBasicStatsValid); } else { - colsToUpdate = getAnyStatsToUpdate(db, tbl, allCols, params, writeId, writeIdString); + colsToUpdate = getAnyStatsToUpdate( + existingStats, db, tbl, allCols, areBasicStatsValid, writeId, writeIdString); } - LOG.debug("Columns to update are {}; existing only: {}, out of: {} based on {}", - colsToUpdate, isExistingOnly, allCols, params); + + LOG.debug("Columns to update are {}; existing only: {}, out of: {}", + colsToUpdate, isExistingOnly, allCols); if (colsToUpdate == null || colsToUpdate.isEmpty()) { return null; // No update necessary. @@ -284,15 +301,15 @@ private void stopWorkers() { // TODO: ideally when col-stats-accurate stuff is stored in some sane structure, this should // retrieve partsToUpdate in a single query; no checking partition params in java. List partNames = null; - Map> colsPerPartition = null; + Map> colsPerPartitionAcc = null, colsPerPartitionInacc = null; boolean isAllParts = true; if (isExistingOnly) { // Make sure the number of partitions we get, and the number of stats objects, is consistent. rs.openTransaction(); boolean isOk = false; try { - colsPerPartition = rs.getPartitionColsWithStats(cat, db, tbl); - partNames = Lists.newArrayList(colsPerPartition.keySet()); + colsPerPartitionInacc = rs.getPartitionColsWithAccurateStats(cat, db, tbl, writeIdString, false); + partNames = Lists.newArrayList(colsPerPartitionInacc.keySet()); int partitionCount = rs.getNumPartitionsByFilter(cat, db, tbl, ""); isAllParts = partitionCount == partNames.size(); isOk = true; @@ -304,10 +321,12 @@ private void stopWorkers() { } } } else { + colsPerPartitionAcc = rs.getPartitionColsWithAccurateStats(cat, db, tbl, writeIdString, true); partNames = rs.listPartitionNames(cat, db, tbl, (short) -1); isAllParts = true; } Table t = rs.getTable(cat, db, tbl); + boolean isTxn = AcidUtils.isTransactionalTable(t); List currentBatch = null; int nextBatchStart = 0, nextIxInBatch = -1, currentBatchStart = 0; List colsToUpdateForAll = null; @@ -329,6 +348,7 @@ private void stopWorkers() { } nextIxInBatch = 0; } + int currentIxInBatch = nextIxInBatch++; Partition part = currentBatch.get(currentIxInBatch); String partName = Warehouse.makePartName(t.getPartitionKeys(), part.getValues()); @@ -347,17 +367,37 @@ private void stopWorkers() { isAllParts = false; continue; } + boolean areBasicStatsAccurate = part.isSetIsStatsCompliant() && part.isIsStatsCompliant(); + if (isTxn) { + areBasicStatsAccurate = ObjectStore.isCurrentStatsValidForTheQuery( + conf, areBasicStatsAccurate, part.getWriteId(), writeIdString, false); + } // Find which columns we need to update for this partition, if any. - List colsToMaybeUpdate = allCols; - if (isExistingOnly) { - colsToMaybeUpdate = colsPerPartition.get(partName); - Collections.sort(colsToMaybeUpdate); + List colsToUpdate = allCols; + if (areBasicStatsAccurate) { + if (isExistingOnly) { + colsToUpdate = colsPerPartitionInacc.get(partName); + } else { + List colsAccurate = colsPerPartitionAcc.get(partName); + if (colsAccurate != null) { + if (colsAccurate.size() >= allCols.size()) { + continue; // No need to update anything for this partition. + } else if (!colsAccurate.isEmpty()) { + // A subset of columns is accurate, which should be rare + // (or we just got turned on after some partial analyze). + colsToUpdate = new ArrayList<>(colsToUpdate.size() - colsAccurate.size()); + Set colsAccurateSet = new HashSet<>(colsAccurate); + for (String col : allCols) { + if (colsAccurateSet.contains(col)) continue; + colsToUpdate.add(col); + } + } + } + } } - List colsToUpdate = getAnyStatsToUpdate(db, tbl, colsToMaybeUpdate, params, - writeIdString == null ? -1 : part.getWriteId(), writeIdString); - LOG.debug("Updating {} based on {} and {}", colsToUpdate, colsToMaybeUpdate, params); + LOG.debug("Updating {}", colsToUpdate); if (colsToUpdate == null || colsToUpdate.isEmpty()) { if (isAllParts) { @@ -433,55 +473,51 @@ private String buildPartColStr(Table table) { return partColStr; } - private List getExistingNonPartTableStatsToUpdate(TableName fullTableName, - String cat, String db, String tbl, Map params, long statsWriteId, - List allCols, String writeIdString) throws MetaException { - ColumnStatistics existingStats = null; - try { - // Note: this should NOT do txn verification - we want to get outdated stats, to - // see if we need to update anything. - existingStats = rs.getTableColumnStatistics(cat, db, tbl, allCols); - } catch (NoSuchObjectException e) { - LOG.error("Cannot retrieve existing stats, skipping " + fullTableName, e); - return null; - } - // TODO: we should probably skip updating if writeId is from an active txn - boolean isTxnValid = (writeIdString == null) || ObjectStore.isCurrentStatsValidForTheQuery( - conf, params, statsWriteId , writeIdString, false); - return getExistingStatsToUpdate(existingStats, params, isTxnValid); - } - - private List getExistingStatsToUpdate( - ColumnStatistics existingStats, Map params, boolean isTxnValid) { - boolean hasAnyAccurate = isTxnValid && StatsSetupConst.areBasicStatsUptoDate(params); + private List getExistingStatsToUpdate(ColumnStatistics existingStats, + String writeIdString, long statsWriteId, boolean isBasicValid) throws MetaException { List colsToUpdate = new ArrayList<>(); for (ColumnStatisticsObj obj : existingStats.getStatsObj()) { String col = obj.getColName(); - if (!hasAnyAccurate || !StatsSetupConst.areColumnStatsUptoDate(params, col)) { + if (!isBasicValid) { + colsToUpdate.add(col); + continue; + } + // Unlike other paths, here we only pass in writeIdString == null for non-txn tables. + if ((writeIdString == null) || ObjectStore.isCurrentStatsValidForTheQuery( + conf, obj.isSetIsStatsCompliant() && obj.isIsStatsCompliant(), obj.getWriteId(), + writeIdString, false)) { colsToUpdate.add(col); } } return colsToUpdate; } - private List getAnyStatsToUpdate(String db, String tbl, List allCols, - Map params, long statsWriteId, String writeIdString) throws MetaException { + private List getAnyStatsToUpdate(ColumnStatistics existingStats, String db, String tbl, + List allCols, boolean areBasicStatsValid, long statsWriteId, String writeIdString) + throws MetaException { // Note: we only run "for columns" command and assume no basic stats means no col stats. - if (!StatsSetupConst.areBasicStatsUptoDate(params)) { + if (!areBasicStatsValid) { return allCols; } // TODO: we should probably skip updating if writeId is from an active txn if (writeIdString != null && !ObjectStore.isCurrentStatsValidForTheQuery( - conf, params, statsWriteId, writeIdString, false)) { + conf, areBasicStatsValid, statsWriteId, writeIdString, false)) { return allCols; } - List colsToUpdate = new ArrayList<>(); - for (String col : allCols) { - if (!StatsSetupConst.areColumnStatsUptoDate(params, col)) { - colsToUpdate.add(col); + HashSet colSet = null; + for (ColumnStatisticsObj obj : existingStats.getStatsObj()) { + if (ObjectStore.isCurrentStatsValidForTheQuery(conf, + obj.isSetIsStatsCompliant() && obj.isIsStatsCompliant(), statsWriteId, writeIdString, false)) { + if (colSet == null) { + colSet = new HashSet<>(allCols); + } + if (!colSet.remove(obj.getColName())) { + LOG.warn("Column " + obj.getColName() + " was not found in the set"); + } } } - return colsToUpdate; + + return colSet == null ? allCols : new ArrayList<>(colSet); } private List getTablesToCheck() throws MetaException, NoSuchObjectException { @@ -559,9 +595,9 @@ private static String makeFullPartName(TableName tableName, String partName) { private final static class AnalyzeWork { TableName tableName; String partName, allParts; - List cols; + Collection cols; - public AnalyzeWork(TableName tableName, String partName, String allParts, List cols) { + public AnalyzeWork(TableName tableName, String partName, String allParts, Collection cols) { this.tableName = tableName; this.partName = partName; this.allParts = allParts; diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index ae51b4db5141a22762479ec5c7a0d25eee9917fe..b70c71d19be459fd255a9253684fadabd24bb2ed 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -128,7 +128,6 @@ /** * Collect table, partition and column level statistics - * Note: DOES NOT CHECK txn stats. * @param conf * - hive configuration * @param partList @@ -227,7 +226,6 @@ private static void estimateStatsForMissingCols(List neededColumns, List } } - /** Note: DOES NOT CHECK txn stats. */ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List schema, List neededColumns, ColumnStatsList colStatsCache, List referencedColumns, boolean fetchColStats) @@ -266,10 +264,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p long numErasureCodedFiles = getErasureCodedFiles(table); if (fetchColStats) { - // Note: this is currently called from two notable places (w/false for checkTxn) - // 1) StatsRulesProcFactory.TableScanStatsRule via collectStatistics - // 2) RelOptHiveTable via getColStats and updateColStats. - colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache, false); + colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache); if(colStats == null) { colStats = Lists.newArrayList(); } @@ -389,11 +384,8 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p // size is 0, aggrStats is null after several retries. Thus, we can // skip the step to connect to the metastore. if (neededColsToRetrieve.size() > 0 && partNames.size() > 0) { - // Note: this is currently called from two notable places (w/false for checkTxn) - // 1) StatsRulesProcFactory.TableScanStatsRule via collectStatistics - // 2) RelOptHiveTable via getColStats and updateColStats. aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), - neededColsToRetrieve, partNames, false); + neededColsToRetrieve, partNames, true); } boolean statsRetrieved = aggrStats != null && @@ -598,6 +590,7 @@ public static ColStatistics getColStatsForPartCol(ColumnInfo ci,PartitionIterabl ci.getObjectInspector(), partCS.getColumnType())); partCS.setRange(getRangePartitionColumn(partList, ci.getInternalName(), ci.getType().getTypeName(), conf.getVar(ConfVars.DEFAULTPARTITIONNAME))); + partCS.setIsAccurate(true); // We have all the values so that would be accurate. return partCS; } @@ -843,6 +836,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab String colName) { String colTypeLowerCase = cso.getColType().toLowerCase(); ColStatistics cs = new ColStatistics(colName, colTypeLowerCase); + cs.setIsAccurate(cso.isSetIsStatsCompliant() && cso.isIsStatsCompliant()); ColumnStatisticsData csd = cso.getStatsData(); if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME) @@ -926,6 +920,7 @@ private static ColStatistics estimateColStats(long numRows, String colName, Hive ColumnInfo cinfo = getColumnInfoForColumn(colName, schema); ColStatistics cs = new ColStatistics(colName, cinfo.getTypeName()); cs.setIsEstimated(true); + cs.setIsAccurate(false); String colTypeLowerCase = cinfo.getTypeName().toLowerCase(); @@ -1004,7 +999,7 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ */ public static List getTableColumnStats( Table table, List schema, List neededColumns, - ColumnStatsList colStatsCache, boolean checkTransactional) { + ColumnStatsList colStatsCache) { if (table.isMaterializedTable()) { LOG.debug("Materialized table does not contain table statistics"); return null; @@ -1033,7 +1028,7 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ List stats = null; try { List colStat = Hive.get().getTableColumnStatistics( - dbName, tabName, colStatsToRetrieve, checkTransactional); + dbName, tabName, colStatsToRetrieve, true); stats = convertColStats(colStat, tabName); } catch (HiveException e) { LOG.error("Failed to retrieve table statistics: ", e); @@ -1507,7 +1502,9 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis long numNulls = 0; ObjectInspector oi = end.getWritableObjectInspector(); long numRows = parentStats.getNumRows(); - + // We are getting expression statistics that are always valid; the parent-derived stats + // derive the accuracy from the parent. For most cases, set accurate to true. + boolean isAccurate = false; if (end instanceof ExprNodeColumnDesc) { // column projection ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; @@ -1546,6 +1543,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis } else { countDistincts = 1; } + isAccurate = true; } else if (end instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; colName = engfd.getName(); @@ -1565,6 +1563,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis return newStats; } } + isAccurate = false; // fallback to default countDistincts = getNDVFor(engfd, numRows, parentStats); @@ -1575,6 +1574,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis colName = Joiner.on(",").join(encd.getCols()); colType = serdeConstants.LIST_TYPE_NAME; countDistincts = numRows; + isAccurate = true; } else if (end instanceof ExprNodeFieldDesc) { // field within complex type @@ -1582,6 +1582,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis colName = enfd.getFieldName(); colType = enfd.getTypeString(); countDistincts = numRows; + isAccurate = true; } else { throw new IllegalArgumentException("not supported expr type " + end.getClass()); } @@ -1589,6 +1590,8 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis colType = colType.toLowerCase(); avgColSize = getAvgColLenOf(conf, oi, colType); ColStatistics colStats = new ColStatistics(colName, colType); + + colStats.setIsAccurate(isAccurate); colStats.setAvgColLen(avgColSize); colStats.setCountDistint(countDistincts); colStats.setNumNulls(numNulls); @@ -1922,18 +1925,28 @@ public static boolean areBasicStatsUptoDateForQueryAnswering(Table table, Map params, String colName) { + public static boolean areColumnStatsUptoDateForQueryAnswering(Table table, ColumnStatisticsObj cso) { + return areColumnStatsUptoDateForQueryAnswering(table, + cso.isSetIsStatsCompliant() && cso.isIsStatsCompliant()); + } + + /** + * Are the column stats for the table up-to-date for query planning. + * Can run additional checks compared to the version in StatsSetupConst. + */ + public static boolean areColumnStatsUptoDateForQueryAnswering(Table table, boolean isAccurate) { // HIVE-19332: external tables should not be considered to have up-to-date stats. if (MetaStoreUtils.isExternalTable(table.getTTable())) { return false; } - return StatsSetupConst.areColumnStatsUptoDate(params, colName); + + return isAccurate; } } diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java index 183f1279adc8b6c66a9af5f7cc150390bc54bc2e..4cfdaf5e078c1faa9918ea018ff401196d487c8a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java @@ -92,7 +92,7 @@ public void before() { builder = HiveRelFactories.HIVE_BUILDER.create(optCluster, schemaMock); - StatsSetupConst.setStatsStateForCreateTable(tableParams, Lists.newArrayList("_int"), StatsSetupConst.TRUE); + StatsSetupConst.setStatsStateForCreateTable(tableParams); tableParams.put(StatsSetupConst.ROW_COUNT, "3"); } diff --git ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUpdaterThread.java ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUpdaterThread.java index 55131f3c2bf54fdbd100cb2fa377a6740f973420..202fb4997c3f8c8dd7b783dfdfc9b011d27fb615 100644 --- ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUpdaterThread.java +++ ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUpdaterThread.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.stats; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.io.File; import java.util.ArrayList; @@ -28,23 +30,24 @@ import org.apache.curator.shaded.com.google.common.collect.Lists; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; import org.apache.hadoop.hive.ql.DriverUtils; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.thrift.TException; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -198,7 +201,7 @@ public void testTxnTable() throws Exception { badWriteId = msClient.allocateTableWriteId(badTxnId, dbName, tblName); tbl = msClient.getTable(dbName, tblName); tbl.setWriteId(badWriteId); - StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); + tbl.setIsStatsCompliant(false); msClient.alter_table(null, dbName, tblName, tbl, new EnvironmentContext(), initialWriteIds.toString()); // Stats should not be valid. @@ -545,68 +548,69 @@ private void setPartitionSkipProperty( private void verifyAndUnsetColStats( String tblName, List cols, IMetaStoreClient msClient) throws Exception { Table tbl = msClient.getTable(ss.getCurrentDatabase(), tblName); - verifyAndUnsetColStatsVal(tbl.getParameters(), cols); - EnvironmentContext ec = new EnvironmentContext(); - // Make sure metastore doesn't mess with our bogus stats updates. - ec.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); - msClient.alter_table_with_environmentContext(tbl.getDbName(), tbl.getTableName(), tbl, ec); + verifyStats(cols, msClient, tbl, true, true, null); + long txnId = msClient.openTxn("test"); + long writeId = msClient.allocateTableWriteId(txnId, tbl.getDbName(), tbl.getTableName()); + msClient.invalidateAllColumnStats(tbl.getCatName(), tbl.getDbName(), tbl.getTableName(), null, writeId); + msClient.commitTxn(txnId); // Double-check. tbl = msClient.getTable(ss.getCurrentDatabase(), tblName); - for (String col : cols) { - assertFalse(StatsSetupConst.areColumnStatsUptoDate(tbl.getParameters(), col)); - } + verifyStats(cols, msClient, tbl, true, false, null); } - private void verifyAndUnsetColStatsVal(Map params, List cols) { - assertTrue(StatsSetupConst.areBasicStatsUptoDate(params)); - for (String col : cols) { - assertTrue(StatsSetupConst.areColumnStatsUptoDate(params, col)); + private void verifyStats(List cols, IMetaStoreClient msClient, Table tbl, + boolean basicState, boolean colState, String validWriteIds) throws NoSuchObjectException, MetaException, TException { + assertEquals(basicState, tbl.isIsStatsCompliant()); + List stats = msClient.getTableColumnStatistics( + tbl.getDbName(), tbl.getTableName(), cols, validWriteIds); + for (ColumnStatisticsObj obj : stats) { + assertEquals(colState, obj.isIsStatsCompliant()); } - StatsSetupConst.removeColumnStatsState(params, cols); - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); } - private void verifyAndUnsetColStats(String tblName, String partName, List cols, - IMetaStoreClient msClient) throws Exception { + private void verifyAndUnsetColStats( + String tblName, String partName, List cols, IMetaStoreClient msClient) throws Exception { Partition part = msClient.getPartition(ss.getCurrentDatabase(), tblName, partName); - verifyAndUnsetColStatsVal(part.getParameters(), cols); - EnvironmentContext ec = new EnvironmentContext(); - // Make sure metastore doesn't mess with our bogus stats updates. - ec.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); - msClient.alter_partition(part.getCatName(), part.getDbName(), tblName, part, ec); + verifyStats(cols, msClient, part, partName, true, true, null); + long txnId = msClient.openTxn("test"); + long writeId = msClient.allocateTableWriteId(txnId, part.getDbName(), part.getTableName()); + msClient.invalidateAllColumnStats( + part.getCatName(), part.getDbName(), part.getTableName(), partName, writeId); + msClient.commitTxn(txnId); // Double-check. part = msClient.getPartition(ss.getCurrentDatabase(), tblName, partName); - for (String col : cols) { - assertFalse(StatsSetupConst.areColumnStatsUptoDate(part.getParameters(), col)); + verifyStats(cols, msClient, part, partName, true, false, null); + } + + private void verifyStats(List cols, IMetaStoreClient msClient, Partition part, String partName, + boolean basicState, boolean colState, String validWriteIds) throws NoSuchObjectException, MetaException, TException { + assertEquals(basicState, part.isIsStatsCompliant()); + Map> statsMap = msClient.getPartitionColumnStatistics( + part.getCatName(), part.getDbName(), part.getTableName(), + Lists.newArrayList(partName), cols, validWriteIds); + List stats = statsMap.values().iterator().next(); + for (ColumnStatisticsObj obj : stats) { + assertEquals(colState, obj.isIsStatsCompliant()); } } + private void verifyStatsUpToDate(String tbl, List cols, IMetaStoreClient msClient, boolean isUpToDate) throws Exception { Table table = msClient.getTable(ss.getCurrentDatabase(), tbl); - verifyStatsUpToDate(table.getParameters(), cols, isUpToDate); + verifyStats(cols, msClient, table, isUpToDate, isUpToDate, null); } private void verifyStatsUpToDate(String tbl, List cols, IMetaStoreClient msClient, String validWriteIds, boolean isUpToDate) throws Exception { Table table = msClient.getTable(ss.getCurrentCatalog(), ss.getCurrentDatabase(), tbl, validWriteIds); - verifyStatsUpToDate(table.getParameters(), cols, isUpToDate); - } - - private void verifyStatsUpToDate(Map params, List cols, - boolean isUpToDate) { - if (isUpToDate) { - assertTrue(StatsSetupConst.areBasicStatsUptoDate(params)); - } - for (String col : cols) { - assertEquals(isUpToDate, StatsSetupConst.areColumnStatsUptoDate(params, col)); - } + verifyStats(cols, msClient, table, isUpToDate, isUpToDate, validWriteIds); } private void verifyStatsUpToDate(String tbl, String part, ArrayList cols, IMetaStoreClient msClient, boolean isUpToDate) throws Exception { Partition partition = msClient.getPartition(ss.getCurrentDatabase(), tbl, part); - verifyStatsUpToDate(partition.getParameters(), cols, isUpToDate); + verifyStats(cols, msClient, partition, part, isUpToDate, isUpToDate, null); } private void executeQuery(String query) throws HiveException { diff --git standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift index 8a4bdd8ed827572f5fd9d291c5454630d84284bd..c877cbaa09b9e85bfd2bc4f1b7e87f8314945586 100644 --- standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift +++ standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift @@ -567,7 +567,9 @@ union ColumnStatisticsData { struct ColumnStatisticsObj { 1: required string colName, 2: required string colType, -3: required ColumnStatisticsData statsData +3: required ColumnStatisticsData statsData, +4: optional bool isStatsCompliant, +5: optional i64 writeId } struct ColumnStatisticsDesc { @@ -581,15 +583,12 @@ struct ColumnStatisticsDesc { struct ColumnStatistics { 1: required ColumnStatisticsDesc statsDesc, -2: required list statsObj, -3: optional bool isStatsCompliant // Are the stats isolation-level-compliant with the - // the calling query? +2: required list statsObj } struct AggrStats { 1: required list colStats, 2: required i64 partsFound, // number of partitions for which stats were found -3: optional bool isStatsCompliant } struct SetPartitionsStatsRequest { @@ -603,6 +602,30 @@ struct SetPartitionsStatsResponse { 1: required bool result; } +struct SetBasicStatsRequest { +1: required ColumnStatisticsDesc desc, // Column... is a misnomer. Applies to any stats. +2: required bool isValid, +3: optional map legacyStats, // this may be normalized later +4: optional i64 writeId=-1, // writeId for the current query that updates the stats +5: optional string validWriteIdList // valid write id list for the table for which this struct is being sent +} + +struct SetBasicStatsResponse { +1: required bool result; +} + +struct InvalidateColumnStatsRequest { +1: optional string catName +2: required string dbName, +3: required string tableName, +4: optional string partName, +5: optional i64 writeId=-1 // writeId for the current query that updates the stats +} + +struct InvalidateColumnStatsResponse { +1: required bool result; +} + // schema of the table/query results etc. struct Schema { // column names, types, comments @@ -729,13 +752,11 @@ struct PartitionsByExprRequest { } struct TableStatsResult { - 1: required list tableStats, - 2: optional bool isStatsCompliant + 1: required list tableStats } struct PartitionsStatsResult { - 1: required map> partStats, - 2: optional bool isStatsCompliant + 1: required map> partStats } struct TableStatsRequest { @@ -2058,6 +2079,14 @@ service ThriftHiveMetastore extends fb303.FacebookService SetPartitionsStatsResponse update_partition_column_statistics_req(1:SetPartitionsStatsRequest req) throws (1:NoSuchObjectException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4) + SetBasicStatsResponse update_table_basic_statistics_req(1:SetBasicStatsRequest req) throws (1:NoSuchObjectException o1, + 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4) + SetBasicStatsResponse update_partition_basic_statistics_req(1:SetBasicStatsRequest req) throws (1:NoSuchObjectException o1, + 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4) + + InvalidateColumnStatsResponse invalidate_all_column_statistics_req(1:InvalidateColumnStatsRequest req) throws (1:NoSuchObjectException o1, + 2:InvalidObjectException o2, 3:MetaException o3, 4:InvalidInputException o4) + // get APIs return the column statistics corresponding to db_name, tbl_name, [part_name], col_name if // such statistics exists. If the required statistics doesn't exist, get APIs throw NoSuchObjectException diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 35be3c4d72789896c5b7fa05023d78911e94859b..10e6d8ad226080372bc5af3c222b485d4fb0b90d 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -17,38 +17,22 @@ */ package org.apache.hadoop.hive.common; -import java.io.IOException; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.TreeMap; -import com.google.common.collect.ImmutableList; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectReader; -import com.fasterxml.jackson.databind.ObjectWriter; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.google.common.collect.ImmutableList; /** * A class that defines the constant strings used by the statistics implementation. */ - public class StatsSetupConst { protected static final Logger LOG = LoggerFactory.getLogger(StatsSetupConst.class.getName()); @@ -122,6 +106,7 @@ public String getAggregator(Configuration conf) { public static final List SUPPORTED_STATS = ImmutableList.of( NUM_FILES, ROW_COUNT, TOTAL_SIZE, RAW_DATA_SIZE, NUM_ERASURE_CODED_FILES); + public static final String COLUMN_STATS_ACCURATE_DEPRECATED = "COLUMN_STATS_ACCURATE"; /** * List of all statistics that need to be collected during query execution. These are * statistics that inherently require a scan of the data. @@ -147,14 +132,6 @@ public String getAggregator(Configuration conf) { // update should take place, such as with replication. public static final String DO_NOT_UPDATE_STATS = "DO_NOT_UPDATE_STATS"; - //This string constant will be persisted in metastore to indicate whether corresponding - //table or partition's statistics and table or partition's column statistics are accurate or not. - public static final String COLUMN_STATS_ACCURATE = "COLUMN_STATS_ACCURATE"; - - public static final String COLUMN_STATS = "COLUMN_STATS"; - - public static final String BASIC_STATS = "BASIC_STATS"; - public static final String CASCADE = "CASCADE"; public static final String TRUE = "true"; @@ -163,9 +140,28 @@ public String getAggregator(Configuration conf) { // The parameter keys for the table statistics. Those keys are excluded from 'show create table' command output. public static final List TABLE_PARAMS_STATS_KEYS = ImmutableList.of( - COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS, - NUM_ERASURE_CODED_FILES); + NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS, NUM_ERASURE_CODED_FILES); + + public static void setStatsStateForCreateTable(Map params) { + for (String stat : StatsSetupConst.SUPPORTED_STATS) { + params.put(stat, "0"); + } + } + + + public static Map extractStats( + Map partParameters) { + Map map = new HashMap<>(); + for (String key : TABLE_PARAMS_STATS_KEYS) { + String val = partParameters.get(key); + if (val == null) continue; + map.put(key, val); + } + return map; + } + +/** TODO## move this for a conversion script private static class ColumnStatsAccurate { private static ObjectReader objectReader; private static ObjectWriter objectWriter; @@ -205,132 +201,8 @@ public Boolean deserialize(JsonParser jsonParser, @JsonSerialize(contentUsing = BooleanSerializer.class) @JsonDeserialize(contentUsing = BooleanDeserializer.class) TreeMap columnStats = new TreeMap<>(); - - } - - public static boolean areBasicStatsUptoDate(Map params) { - if (params == null) { - return false; - } - ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - return stats.basicStats; - } - - public static boolean areColumnStatsUptoDate(Map params, String colName) { - if (params == null) { - return false; - } - ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - return stats.columnStats.containsKey(colName); - } - - // It will only throw JSONException when stats.put(BASIC_STATS, TRUE) - // has duplicate key, which is not possible - // note that set basic stats false will wipe out column stats too. - public static void setBasicStatsState(Map params, String setting) { - if (setting.equals(FALSE)) { - if (params!=null && params.containsKey(COLUMN_STATS_ACCURATE)) { - params.remove(COLUMN_STATS_ACCURATE); - } - return; - } - if (params == null) { - throw new RuntimeException("params are null...cant set columnstatstate!"); - } - ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - stats.basicStats = true; - try { - params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); - } catch (JsonProcessingException e) { - throw new RuntimeException("can't serialize column stats", e); - } - } - - public static void setColumnStatsState(Map params, List colNames) { - if (params == null) { - throw new RuntimeException("params are null...cant set columnstatstate!"); - } - if (colNames == null) { - return; - } - ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - - for (String colName : colNames) { - if (!stats.columnStats.containsKey(colName)) { - stats.columnStats.put(colName, true); - } - } - try { - params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); - } catch (JsonProcessingException e) { - LOG.trace(e.getMessage()); - } - } - - public static boolean canColumnStatsMerge(Map params, String colName) { - if (params == null) { - return false; - } - // TODO: should this also check that the basic flag is valid? - ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - return stats.columnStats.containsKey(colName); - } - - public static void clearColumnStatsState(Map params) { - if (params == null) { - return; - } - - ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - stats.columnStats.clear(); - - try { - params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); - } catch (JsonProcessingException e) { - LOG.trace(e.getMessage()); - } } - public static void removeColumnStatsState(Map params, List colNames) { - if (params == null) { - return; - } - try { - ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); - for (String string : colNames) { - stats.columnStats.remove(string); - } - params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); - } catch (JsonProcessingException e) { - LOG.trace(e.getMessage()); - } - } - - public static void setStatsStateForCreateTable(Map params, - List cols, String setting) { - if (TRUE.equals(setting)) { - for (String stat : StatsSetupConst.SUPPORTED_STATS) { - params.put(stat, "0"); - } - } - setBasicStatsState(params, setting); - if (TRUE.equals(setting)) { - setColumnStatsState(params, cols); - } - } - - private static ColumnStatsAccurate parseStatsAcc(String statsAcc) { - if (statsAcc == null) { - return new ColumnStatsAccurate(); - } - try { - return ColumnStatsAccurate.objectReader.readValue(statsAcc); - } catch (Exception e) { - ColumnStatsAccurate ret = new ColumnStatsAccurate(); - if (TRUE.equalsIgnoreCase(statsAcc)) { - ret.basicStats = true; - } - return ret; - } - } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); +*/ } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 69f6ed570eba6a4e323e9570d2bba3a990542b51..0da313b75625077df1f6506d472dd4b24f8520e1 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -318,7 +318,8 @@ public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbnam !isPartitionedTable) { Database db = msdb.getDatabase(catName, newDbName); // Update table stats. For partitioned table, we update stats in alterPartition() - MetaStoreUtils.updateTableStatsSlow(db, newt, wh, false, true, environmentContext); + MetaStoreUtils.updateTableFsStatsSlow( + db, newt, wh, false, true, environmentContext); } if (isPartitionedTable) { @@ -465,14 +466,11 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str "Unable to alter partition because table or database does not exist."); } oldPart = msdb.getPartition(catName, dbname, name, new_part.getValues()); - if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) { + if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext) + && !MetaStoreUtils.isFastStatsSame(oldPart, new_part)) { // if stats are same, no need to update - if (MetaStoreUtils.isFastStatsSame(oldPart, new_part)) { - MetaStoreUtils.updateBasicState(environmentContext, new_part.getParameters()); - } else { - MetaStoreUtils.updatePartitionStatsFast( - new_part, tbl, wh, false, true, environmentContext, false); - } + MetaStoreUtils.updatePartitionStatsFast( + new_part, tbl, wh, false, true, environmentContext, false); } // PartitionView does not have SD. We do not need update its column stats @@ -709,14 +707,10 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str oldParts.add(oldTmpPart); partValsList.add(tmpPart.getValues()); - if (MetaStoreUtils.requireCalStats(oldTmpPart, tmpPart, tbl, environmentContext)) { - // Check if stats are same, no need to update - if (MetaStoreUtils.isFastStatsSame(oldTmpPart, tmpPart)) { - MetaStoreUtils.updateBasicState(environmentContext, tmpPart.getParameters()); - } else { - MetaStoreUtils.updatePartitionStatsFast( - tmpPart, tbl, wh, false, true, environmentContext, false); - } + if (MetaStoreUtils.requireCalStats(oldTmpPart, tmpPart, tbl, environmentContext) + && !MetaStoreUtils.isFastStatsSame(oldTmpPart, tmpPart)) { + MetaStoreUtils.updatePartitionStatsFast( + tmpPart, tbl, wh, false, true, environmentContext, false); } // PartitionView does not have SD and we do not need to update its column stats @@ -848,7 +842,6 @@ void alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTa deletedCols.add(statsObj.getColName()); } } - StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols); } } } @@ -923,7 +916,6 @@ private ColumnStatistics updateOrGetPartitionColumnStats( deletedCols.add(statsObj.getColName()); } } - StatsSetupConst.removeColumnStatsState(part.getParameters(), deletedCols); if (!newStatsObjs.isEmpty()) { partColStats.setStatsObj(newStatsObjs); newPartsColStats = partColStats; diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index a53d4be03d695bf2176436967026757391531bc9..787a926817e257caa28f5b62809ed9435531290a 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1862,7 +1862,7 @@ private void create_table_core(final RawStore ms, final Table tbl, } if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updateTableStatsSlow(db, tbl, wh, madeDir, false, envContext); + MetaStoreUtils.updateTableFsStatsSlow(db, tbl, wh, madeDir, false, envContext); } // set create time @@ -2680,88 +2680,45 @@ public void drop_table_with_environment_context(final String dbname, final Strin } - private void updateStatsForTruncate(Map props, EnvironmentContext environmentContext) { - if (null == props) { - return; - } - for (String stat : StatsSetupConst.SUPPORTED_STATS) { - String statVal = props.get(stat); - if (statVal != null) { - //In the case of truncate table, we set the stats to be 0. - props.put(stat, "0"); - } - } - //first set basic stats to true - StatsSetupConst.setBasicStatsState(props, StatsSetupConst.TRUE); - environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK); - //then invalidate column stats - StatsSetupConst.clearColumnStatsState(props); - return; - } - - private void alterPartitionForTruncate(RawStore ms, String catName, String dbName, String tableName, - Table table, Partition partition, String validWriteIds, long writeId) throws Exception { - EnvironmentContext environmentContext = new EnvironmentContext(); - updateStatsForTruncate(partition.getParameters(), environmentContext); - - if (!transactionalListeners.isEmpty()) { - MetaStoreListenerNotifier.notifyEvent(transactionalListeners, - EventType.ALTER_PARTITION, - new AlterPartitionEvent(partition, partition, table, true, true, this)); - } - - if (!listeners.isEmpty()) { - MetaStoreListenerNotifier.notifyEvent(listeners, - EventType.ALTER_PARTITION, - new AlterPartitionEvent(partition, partition, table, true, true, this)); - } - - if (writeId > 0) { - partition.setWriteId(writeId); - } - alterHandler.alterPartition(ms, wh, catName, dbName, tableName, null, partition, - environmentContext, this, validWriteIds); - } - private void alterTableStatsForTruncate(RawStore ms, String catName, String dbName, String tableName, Table table, List partNames, String validWriteIds, long writeId) throws Exception { - if (partNames == null) { - if (0 != table.getPartitionKeysSize()) { - for (Partition partition : ms.getPartitions(catName, dbName, tableName, Integer.MAX_VALUE)) { - alterPartitionForTruncate(ms, catName, dbName, tableName, table, partition, - validWriteIds, writeId); + ms.openTransaction(); + boolean isOk = false; + try { + getMS().invalidateAllColumnStatistics(catName, dbName, tableName, partNames, writeId); + if (partNames == null) { + if (0 != table.getPartitionKeysSize()) { + for (Partition partition : ms.getPartitions(catName, dbName, tableName, Integer.MAX_VALUE)) { + getMS().alterPartitionBasicStats(catName, dbName, tableName, partition.getValues(), + generateStatsForTruncate(), true, writeId, validWriteIds); + } + } else { + getMS().alterTableBasicStats(catName, dbName, tableName, + generateStatsForTruncate(), true, writeId, validWriteIds); } } else { - EnvironmentContext environmentContext = new EnvironmentContext(); - updateStatsForTruncate(table.getParameters(), environmentContext); - - if (!transactionalListeners.isEmpty()) { - MetaStoreListenerNotifier.notifyEvent(transactionalListeners, - EventType.ALTER_TABLE, - new AlterTableEvent(table, table, true, true, this)); + for (Partition partition : ms.getPartitionsByNames(catName, dbName, tableName, partNames)) { + getMS().alterPartitionBasicStats(catName, dbName, tableName, partition.getValues(), + generateStatsForTruncate(), true, writeId, validWriteIds); } - - if (!listeners.isEmpty()) { - MetaStoreListenerNotifier.notifyEvent(listeners, - EventType.ALTER_TABLE, - new AlterTableEvent(table, table, true, true, this)); - } - - // TODO: this should actually pass thru and set writeId for txn stats. - if (writeId > 0) { - table.setWriteId(writeId); - } - alterHandler.alterTable(ms, wh, catName, dbName, tableName, table, - environmentContext, this, validWriteIds); } - } else { - for (Partition partition : ms.getPartitionsByNames(catName, dbName, tableName, partNames)) { - alterPartitionForTruncate(ms, catName, dbName, tableName, table, partition, - validWriteIds, writeId); + isOk = true; + } finally { + if (isOk) { + ms.commitTransaction(); + } else { + ms.rollbackTransaction(); } } - return; + } + + private Map generateStatsForTruncate() { + Map zeroStats = new HashMap<>(); + for (String stat : StatsSetupConst.SUPPORTED_STATS) { + zeroStats.put(stat, "0"); + } + return zeroStats; } private List getLocationsForTruncate(final RawStore ms, @@ -5720,9 +5677,21 @@ public TableStatsResult get_table_statistics_req(TableStatsRequest request) thro // is currently only done on metastore size (see set_aggr...). // For some optimizations we might make use of incorrect stats that are "better than // nothing", so this may change in future. - result = new TableStatsResult((cs == null || cs.getStatsObj() == null - || (cs.isSetIsStatsCompliant() && !cs.isIsStatsCompliant())) - ? Lists.newArrayList() : cs.getStatsObj()); + // Also we don't return stats for a subset of the columns. That would have been easy, + // but the calling code might not expect this for now. + if (cs == null || cs.getStatsObj() == null) { + result = new TableStatsResult(Lists.newArrayList()); + } else { + for (ColumnStatisticsObj obj : cs.getStatsObj()) { + if (!obj.isSetIsStatsCompliant() || !obj.isIsStatsCompliant()) { + result = new TableStatsResult(Lists.newArrayList()); + break; + } + } + if (result == null) { + result = new TableStatsResult(cs.getStatsObj()); + } + } } finally { endFunction("get_table_statistics_req", result == null, null, tblName); } @@ -5792,7 +5761,16 @@ public PartitionsStatsResult get_partitions_statistics_req(PartitionsStatsReques // is currently only done on metastore size (see set_aggr...). // For some optimizations we might make use of incorrect stats that are "better than // nothing", so this may change in future. - if (stat.isSetIsStatsCompliant() && !stat.isIsStatsCompliant()) continue; + // Also we don't return stats for a subset of the columns. That would have been easy, + // but the calling code might not expect this for now. + boolean isSomeInaccurate = false; + for (ColumnStatisticsObj obj : stat.getStatsObj()) { + if (!obj.isSetIsStatsCompliant() || !obj.isIsStatsCompliant()) { + isSomeInaccurate = true; + break; + } + } + if (isSomeInaccurate) continue; map.put(stat.getStatsDesc().getPartName(), stat.getStatsObj()); } } @@ -7549,7 +7527,7 @@ public AggrStats get_aggr_stats_for(PartitionsStatsRequest request) throws TExce String dbName = request.getDbName().toLowerCase(); String tblName = request.getTblName().toLowerCase(); startFunction("get_aggr_stats_for", ": table=" + - TableName.getQualified(catName, dbName, tblName)); + TableName.getQualified(catName, dbName, tblName) + "; " + request.getPartNames()); List lowerCaseColNames = new ArrayList<>(request.getColNames().size()); for (String colName : request.getColNames()) { @@ -7566,7 +7544,7 @@ public AggrStats get_aggr_stats_for(PartitionsStatsRequest request) throws TExce lowerCasePartNames, lowerCaseColNames, request.getValidWriteIdList()); return aggrStats; } finally { - endFunction("get_aggr_stats_for", aggrStats == null, null, request.getTblName()); + endFunction("get_aggr_stats_for", aggrStats == null, null, request.getTblName()); } } @@ -7662,40 +7640,12 @@ private boolean updatePartColumnStatsWithMerge(String catName, String dbName, St for (Entry entry : newStatsMap.entrySet()) { ColumnStatistics csNew = entry.getValue(); ColumnStatistics csOld = oldStatsMap.get(entry.getKey()); - boolean isInvalidTxnStats = csOld != null - && csOld.isSetIsStatsCompliant() && !csOld.isIsStatsCompliant(); - Partition part = mapToPart.get(entry.getKey()); - if (isInvalidTxnStats) { - // No columns can be merged; a shortcut for getMergableCols. - csNew.setStatsObj(Lists.newArrayList()); - } else { - // we first use getParameters() to prune the stats - MetaStoreUtils.getMergableCols(csNew, part.getParameters()); - // we merge those that can be merged - if (csOld != null && csOld.getStatsObjSize() != 0 && !csNew.getStatsObj().isEmpty()) { - MetaStoreUtils.mergeColStats(csNew, csOld); - } - } - - if (!csNew.getStatsObj().isEmpty()) { - // We don't short-circuit on errors here anymore. That can leave acid stats invalid. - result = updatePartitonColStatsInternal(t, csNew, - request.getValidWriteIdList(), request.getWriteId()) && result; - } else if (isInvalidTxnStats) { - // For now because the stats state is such as it is, we will invalidate everything. - // Overall the sematics here are not clear - we could invalide only some columns, but does - // that make any physical sense? Could query affect some columns but not others? - part.setWriteId(request.getWriteId()); - StatsSetupConst.clearColumnStatsState(part.getParameters()); - StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); - ms.alterPartition(catName, dbName, tableName, part.getValues(), part, - request.getValidWriteIdList()); - result = false; - } else { - // TODO: why doesn't the original call for non acid tables invalidate the stats? - LOG.debug("All the column stats " + csNew.getStatsDesc().getPartName() - + " are not accurate to merge."); - } + // Merge will propagate the valid flag from the old stats. + MetaStoreUtils.mergeColStats(csNew, csOld); + if (csNew.getStatsObj().isEmpty()) continue; + // We don't short-circuit on errors here anymore. That can leave acid stats invalid. + result = updatePartitonColStatsInternal(t, csNew, + request.getValidWriteIdList(), request.getWriteId()) && result; } ms.commitTransaction(); isCommitted = true; @@ -7711,45 +7661,19 @@ private boolean updatePartColumnStatsWithMerge(String catName, String dbName, St private boolean updateTableColumnStatsWithMerge(String catName, String dbName, String tableName, List colNames, SetPartitionsStatsRequest request) throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { - ColumnStatistics firstColStats = request.getColStats().get(0); + ColumnStatistics csNew = request.getColStats().get(0); RawStore ms = getMS(); ms.openTransaction(); boolean isCommitted = false, result = false; try { ColumnStatistics csOld = ms.getTableColumnStatistics(catName, dbName, tableName, colNames, request.getValidWriteIdList()); - // we first use the valid stats list to prune the stats - boolean isInvalidTxnStats = csOld != null - && csOld.isSetIsStatsCompliant() && !csOld.isIsStatsCompliant(); - if (isInvalidTxnStats) { - // No columns can be merged; a shortcut for getMergableCols. - firstColStats.setStatsObj(Lists.newArrayList()); - } else { - Table t = getTable(catName, dbName, tableName); - MetaStoreUtils.getMergableCols(firstColStats, t.getParameters()); - - // we merge those that can be merged - if (csOld != null && csOld.getStatsObjSize() != 0 && !firstColStats.getStatsObj().isEmpty()) { - MetaStoreUtils.mergeColStats(firstColStats, csOld); - } - } - - if (!firstColStats.getStatsObj().isEmpty()) { - result = updateTableColumnStatsInternal(firstColStats, + // Merge will propagate the valid flag from the old stats. + MetaStoreUtils.mergeColStats(csNew, csOld); + if (!csNew.getStatsObj().isEmpty()) { + // We don't short-circuit on errors here anymore. That can leave acid stats invalid. + result = updateTableColumnStatsInternal(csNew, request.getValidWriteIdList(), request.getWriteId()); - } else if (isInvalidTxnStats) { - // For now because the stats state is such as it is, we will invalidate everything. - // Overall the sematics here are not clear - we could invalide only some columns, but does - // that make any physical sense? Could query affect some columns but not others? - Table t = getTable(catName, dbName, tableName); - t.setWriteId(request.getWriteId()); - StatsSetupConst.clearColumnStatsState(t.getParameters()); - StatsSetupConst.setBasicStatsState(t.getParameters(), StatsSetupConst.FALSE); - ms.alterTable(catName, dbName, tableName, t, request.getValidWriteIdList()); - } else { - // TODO: why doesn't the original call for non acid tables invalidate the stats? - LOG.debug("All the column stats are not accurate to merge."); - result = true; } ms.commitTransaction(); @@ -8967,6 +8891,73 @@ public void add_runtime_stats(RuntimeStat stat) throws TException { endFunction("get_runtime_stats", ex == null, ex); } } + + @Override + public SetBasicStatsResponse update_table_basic_statistics_req( + SetBasicStatsRequest req) throws TException { + startFunction("update_table_basic_statistics_req"); + Exception ex = null; + try { + ColumnStatisticsDesc d = req.getDesc(); + if (d.isSetPartName()) { + throw new InvalidInputException("Partition was specified for a table-level call"); + } + getMS().alterTableBasicStats(d.getCatName(), d.getDbName(), d.getTableName(), + req.getLegacyStats(), req.isSetIsValid() && req.isIsValid(), + req.getWriteId(), req.getValidWriteIdList()); + return new SetBasicStatsResponse(true); + } catch (MetaException e) { + LOG.error("Caught exception", e); + ex = e; + throw e; + } finally { + endFunction("update_table_basic_statistics_req", ex == null, ex); + } + } + + @Override + public SetBasicStatsResponse update_partition_basic_statistics_req( + SetBasicStatsRequest req) throws TException { + startFunction("update_partition_basic_statistics_req"); + Exception ex = null; + try { + ColumnStatisticsDesc d = req.getDesc(); + if (!d.isSetPartName()) { + throw new InvalidInputException("Partition was not specified"); + } + List partVals = Warehouse.getPartValuesFromPartName(d.getPartName()); + getMS().alterPartitionBasicStats(d.getCatName(), d.getDbName(), d.getTableName(), partVals, + req.getLegacyStats(), req.isSetIsValid() && req.isIsValid(), + req.getWriteId(), req.getValidWriteIdList()); + return new SetBasicStatsResponse(true); + } catch (MetaException e) { + LOG.error("Caught exception", e); + ex = e; + throw e; + } finally { + endFunction("update_partition_basic_statistics_req", ex == null, ex); + } + } + + @Override + public InvalidateColumnStatsResponse invalidate_all_column_statistics_req( + InvalidateColumnStatsRequest req) throws NoSuchObjectException, + InvalidObjectException, MetaException, InvalidInputException, TException { + startFunction("invalidate_all_column_statistics_req"); + Exception ex = null; + try { + getMS().invalidateAllColumnStatistics( + req.getCatName(), req.getDbName(), req.getTableName(), + Lists.newArrayList(req.getPartName()), req.getWriteId()); + return new InvalidateColumnStatsResponse(true); + } catch (MetaException e) { + LOG.error("Caught exception", e); + ex = e; + throw e; + } finally { + endFunction("invalidate_all_column_statistics_req", ex == null, ex); + } + } } private static IHMSHandler newRetryingHMSHandler(IHMSHandler baseHandler, Configuration conf) diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 91c86a749c7afb06737c850e57f60820710c51f5..87b24623c5ad6f10ba9e7d94a4736a7525d73582 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -433,6 +433,32 @@ public void alter_table(String catName, String dbName, String tbl_name, Table ne client.alter_table_req(req); } + @Override + public void alterTableBasicStats(String catName, String dbName, String tblName, boolean isValid, + Map basicStats, long writeId, String validWriteIds) throws TException { + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName); + desc.setCatName(catName); + SetBasicStatsRequest req = new SetBasicStatsRequest(desc, isValid); + req.setValidWriteIdList(validWriteIds); + req.setWriteId(writeId); + req.setLegacyStats(basicStats); + client.update_table_basic_statistics_req(req); + } + + @Override + public void alterPartitionBasicStats(String catName, String dbName, String tblName, + String partName, boolean isValid, Map basicStats, + long writeId, String validWriteIds) throws TException { + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tblName); + desc.setCatName(catName); + desc.setPartName(partName); + SetBasicStatsRequest req = new SetBasicStatsRequest(desc, isValid); + req.setValidWriteIdList(validWriteIds); + req.setWriteId(writeId); + req.setLegacyStats(basicStats); + client.update_partition_basic_statistics_req(req); + } + @Deprecated @Override public void renamePartition(final String dbname, final String tableName, final List part_vals, @@ -3575,4 +3601,16 @@ public void addRuntimeStat(RuntimeStat stat) throws TException { req.setMaxCreateTime(maxCreateTime); return client.get_runtime_stats(req); } + + @Override + public void invalidateAllColumnStats( + String catName, String dbName, String tableName, String partName, long writeId) throws TException { + InvalidateColumnStatsRequest req = new InvalidateColumnStatsRequest(dbName, tableName); + req.setCatName(catName); + req.setWriteId(writeId); + if (partName != null) { + req.setPartName(partName); + } + client.invalidate_all_column_statistics_req(req); + } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index 91405b9a334a4b031a5dc7f4a1757a3895bfb386..07eada7c9f820d7766c01546bc710b0b72f4005f 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -3748,4 +3748,15 @@ void createOrDropTriggerToPoolMapping(String resourcePlanName, String triggerNam /** Reads runtime statistics. */ List getRuntimeStats(int maxWeight, int maxCreateTime) throws TException; + + void alterTableBasicStats(String catName, String dbName, String tblName, + boolean isValid, Map basicStats, long writeId, + String validWriteIds) throws TException; + + void alterPartitionBasicStats(String catName, String dbName, String tblName, + String partName, boolean isValid, Map basicStats, + long writeId, String validWriteIds) throws TException; + + void invalidateAllColumnStats(String catName, String dbName, + String tableName, String partName, long writeId) throws TException; } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 95d9fe21bda9e880a4d4dd4af0ad4832a5a61643..b3e09e4e47aa6cfd33f859e477dc23375bccdbc6 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -1430,7 +1430,8 @@ public ColumnStatistics getTableStats(final String catName, final String dbName, public AggrStats aggrColStatsForPartitions(String catName, String dbName, String tableName, List partNames, List colNames, boolean useDensityFunctionForNDVEstimation, - double ndvTuner, boolean enableBitVector) throws MetaException { + double ndvTuner, boolean enableBitVector, String writeIdList) throws MetaException { + // TODO# use writeIdList - pass to aggregators if (colNames.isEmpty() || partNames.isEmpty()) { LOG.debug("Columns is empty or partNames is empty : Short-circuiting stats eval"); return new AggrStats(Collections.emptyList(), 0); // Nothing to aggregate @@ -1452,6 +1453,7 @@ public AggrStats aggrColStatsForPartitions(String catName, String dbName, String // Check the cache first colStatsAggrCached = aggrStatsCache.get(catName, dbName, tableName, colName, partNames); if (colStatsAggrCached != null) { + // TODO# this used to not check txn stats because the check was on partition level, needs to check now colStatsList.add(colStatsAggrCached.getColStats()); partsFound = colStatsAggrCached.getNumPartsCached(); } else { @@ -2770,11 +2772,12 @@ private void dropDanglingColumnDescriptors(List columnDescriptorIdList) return result; } - public Map> getColAndPartNamesWithStats( - String catName, String dbName, String tableName) throws MetaException { - // Could we also join with ACID tables to only get tables with outdated stats? + public Map> getColAndPartNamesWithAccurateStats( + String catName, String dbName, String tableName, String validWriteIds, + boolean isAccurateTarget) throws MetaException { String queryText = "SELECT DISTINCT " + PARTITIONS + ".\"PART_NAME\", " + PART_COL_STATS - + ".\"COLUMN_NAME\" FROM " + TBLS + " INNER JOIN " + DBS + " ON " + TBLS + ".\"DB_ID\" = " + + ".\"COLUMN_NAME\", " + PART_COL_STATS + ".\"WRITE_ID\", " + PART_COL_STATS + + ".\"STATS_ACCURATE\" FROM " + TBLS + " INNER JOIN " + DBS + " ON " + TBLS + ".\"DB_ID\" = " + DBS + ".\"DB_ID\" INNER JOIN " + PARTITIONS + " ON " + TBLS + ".\"TBL_ID\" = " + PARTITIONS + ".\"TBL_ID\" INNER JOIN " + PART_COL_STATS + " ON " + PARTITIONS + ".\"PART_ID\" = " + PART_COL_STATS + ".\"PART_ID\" WHERE " + DBS + ".\"NAME\" = ? AND " @@ -2790,6 +2793,14 @@ private void dropDanglingColumnDescriptors(List columnDescriptorIdList) String lastPartName = null; List cols = null; for (Object[] line : sqlResult) { + boolean isAccurate = extractSqlBoolean(line[3]); + long writeId = 0; + if (validWriteIds != null) { + writeId = extractSqlLong(line[2]); + isAccurate = ObjectStore.isCurrentStatsValidForTheQuery( + conf, isAccurate, writeId, validWriteIds, false); + } + if (isAccurate != isAccurateTarget) continue; String col = extractSqlString(line[1]); String part = extractSqlString(line[0]); if (!part.equals(lastPartName)) { diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 8af164efc92ced841d1b821a1856a2c73c4378ad..7e6deb683b9c26f1e35866bd5e82d8627b39d88e 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -54,6 +54,7 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.regex.Pattern; +import java.util.stream.Collectors; import javax.jdo.JDOCanRetryException; import javax.jdo.JDODataStoreException; @@ -1201,9 +1202,11 @@ public void createTable(Table tbl) throws InvalidObjectException, MetaException openTransaction(); mtbl = convertToMTable(tbl); + mtbl.setAreStatsAccurate(tbl.isSetIsStatsCompliant() && tbl.isIsStatsCompliant()); if (TxnUtils.isTransactionalTable(tbl)) { mtbl.setWriteId(tbl.getWriteId()); } + pm.makePersistent(mtbl); if (tbl.getCreationMetadata() != null) { @@ -1433,20 +1436,13 @@ public Table getTable(String catName, String dbName, String tableName, // check whether the current version table statistics // in the metastore comply with the client query's snapshot isolation. // Note: a partitioned table has table stats and table snapshot in MPartiiton. - if (writeIdList != null) { - boolean isTxn = tbl != null && TxnUtils.isTransactionalTable(tbl); - if (isTxn && !areTxnStatsSupported) { - StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from Table's parameters."); - } else if (isTxn && tbl.getPartitionKeysSize() == 0) { - if (isCurrentStatsValidForTheQuery(mtable, writeIdList, false)) { - tbl.setIsStatsCompliant(true); - } else { - tbl.setIsStatsCompliant(false); - // Do not make persistent the following state since it is the query specific (not global). - StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from Table's parameters."); - } + tbl.setIsStatsCompliant(true); + boolean isTxn = tbl != null && TxnUtils.isTransactionalTable(tbl); + if (isTxn) { + if (!areTxnStatsSupported || writeIdList == null) { + tbl.setIsStatsCompliant(false); + } else if (tbl.getPartitionKeysSize() == 0) { + tbl.setIsStatsCompliant(isCurrentStatsValidForTheQuery(mtable, writeIdList, false)); } } commited = commitTransaction(); @@ -1495,14 +1491,16 @@ public Table getTable(String catName, String dbName, String tableName, } @Override - public Map> getPartitionColsWithStats(String catName, String dbName, String tableName) + public Map> getPartitionColsWithAccurateStats( + String catName, String dbName, String tableName, String validWriteIds, boolean isAccurate) throws MetaException, NoSuchObjectException { return new GetHelper>>(catName, dbName, null, true, false) { @Override protected Map> getSqlResult( GetHelper>> ctx) throws MetaException { try { - return directSql.getColAndPartNamesWithStats(catName, dbName, tableName); + return directSql.getColAndPartNamesWithAccurateStats( + catName, dbName, tableName, validWriteIds, isAccurate); } catch (Throwable ex) { LOG.error("DirectSQL failed", ex); throw new MetaException(ex.getMessage()); @@ -1942,6 +1940,7 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, MetaException { // NOTE: we don't set writeId in this method. Write ID is only set after validating the // existing write ID against the caller's valid list. + // Similarly we don't set isStatsCompliant. if (tbl == null) { return null; } @@ -2259,6 +2258,7 @@ public boolean addPartitions(String catName, String dbName, String tblName, List + dbName + "." + tblName + ": " + part); } MPartition mpart = convertToMPart(part, table, true); + mpart.setAreStatsAccurate(part.isSetIsStatsCompliant() && part.isIsStatsCompliant()); toPersist.add(mpart); int now = (int)(System.currentTimeMillis()/1000); @@ -2336,6 +2336,7 @@ public boolean addPartitions(String catName, String dbName, String tblName, if (isValidPartition(part, partitionKeys, ifNotExists)) { MPartition mpart = convertToMPart(part, table, true); + mpart.setAreStatsAccurate(part.isSetIsStatsCompliant() && part.isIsStatsCompliant()); pm.makePersistent(mpart); if (tabGrants != null) { for (MTablePrivilege tab : tabGrants) { @@ -2384,6 +2385,7 @@ public boolean addPartition(Partition part) throws InvalidObjectException, catName, part.getDbName(), part.getTableName()); } MPartition mpart = convertToMPart(part, table, true); + mpart.setAreStatsAccurate(part.isSetIsStatsCompliant() && part.isIsStatsCompliant()); pm.makePersistent(mpart); int now = (int)(System.currentTimeMillis()/1000); @@ -2446,20 +2448,14 @@ public Partition getPartition(String catName, String dbName, String tableName, // If transactional table partition, check whether the current version partition // statistics in the metastore comply with the client query's snapshot isolation. long statsWriteId = mpart.getWriteId(); + part.setIsStatsCompliant(true); if (TxnUtils.isTransactionalTable(table.getParameters())) { - if (!areTxnStatsSupported) { + if (!areTxnStatsSupported || validWriteIds == null) { // Do not make persistent the following state since it is query specific (not global). - StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); + part.setIsStatsCompliant(false); LOG.info("Removed COLUMN_STATS_ACCURATE from Partition object's parameters."); - } else if (validWriteIds != null) { - if (isCurrentStatsValidForTheQuery(part, statsWriteId, validWriteIds, false)) { - part.setIsStatsCompliant(true); - } else { - part.setIsStatsCompliant(false); - // Do not make persistent the following state since it is query specific (not global). - StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from Partition object's parameters."); - } + } else { + part.setIsStatsCompliant(isCurrentStatsValidForTheQuery(part, statsWriteId, validWriteIds, false)); } } return part; @@ -2570,6 +2566,7 @@ private MPartition convertToMPart(Partition part, MTable mt, boolean useTableCD) throws InvalidObjectException, MetaException { // NOTE: we don't set writeId in this method. Write ID is only set after validating the // existing write ID against the caller's valid list. + // Similarly we don't set isStatsCompliant. if (part == null) { return null; } @@ -4112,16 +4109,12 @@ public Table alterTable(String catName, String dbname, String name, Table newTab // For now only alter name, owner, parameters, cols, bucketcols are allowed oldt.setDatabase(newt.getDatabase()); oldt.setTableName(normalizeIdentifier(newt.getTableName())); - boolean isTxn = TxnUtils.isTransactionalTable(newTable); - if (isTxn && areTxnStatsSupported) { - // Transactional table is altered without a txn. Make sure there are no changes to the flag. - String errorMsg = verifyStatsChangeCtx(oldt.getParameters(), newTable.getParameters(), - newTable.getWriteId(), queryValidWriteIds, false); - if (errorMsg != null) { - throw new MetaException(errorMsg); - } + if ((newTable.isSetWriteId() && oldt.getWriteId() != newTable.getWriteId()) + || (newTable.isSetIsStatsCompliant() && oldt.areStatsAccurate() != newTable.isIsStatsCompliant()) + || hasStatsParams(oldt.getParameters(), newTable.getParameters())) { + throw new MetaException("Stats cannot be modified by alterTable call"); } - boolean isToTxn = isTxn && !TxnUtils.isTransactionalTable(oldt.getParameters()); + oldt.setParameters(newt.getParameters()); oldt.setOwner(newt.getOwner()); oldt.setOwnerType(newt.getOwnerType()); @@ -4142,26 +4135,58 @@ public Table alterTable(String catName, String dbname, String name, Table newTab oldt.setViewExpandedText(newt.getViewExpandedText()); oldt.setRewriteEnabled(newt.isRewriteEnabled()); - // If transactional, update the stats state for the current Stats updater query. - // Don't update for conversion to acid - it doesn't modify stats but passes in qVWIds. - // The fact that it doesn't update stats is verified above. + newTable = convertToTable(oldt); + + // commit the changes + success = commitTransaction(); + } finally { + if (!success) { + rollbackTransaction(); + } + } + return newTable; + } + + private static boolean hasStatsParams(Map op, Map np) { + if (hasParamSet(op, np, StatsSetupConst.COLUMN_STATS_ACCURATE_DEPRECATED)) { + return true; + } + for (String k : StatsSetupConst.SUPPORTED_STATS) { + if (hasParamSet(op, np, k)) return true; + } + return false; + } + + private static boolean hasParamSet( + Map op, Map np, String k) { + String ov = op.get(k), nv = np.get(k); + return (nv != null && !nv.equals(ov)); + } + + @Override + public Table alterTableBasicStats(String catName, String dbname, String name, + Map stats, boolean isAccurate, long writeId, String validWriteIds) throws MetaException { + Table newTable; + boolean success = false; + try { + openTransaction(); + MTable oldt = ensureGetTableNormalized(catName, dbname, name); + + boolean isTxn = TxnUtils.isTransactionalTable(oldt.getParameters()); if (isTxn) { if (!areTxnStatsSupported) { - StatsSetupConst.setBasicStatsState(oldt.getParameters(), StatsSetupConst.FALSE); - } else if (queryValidWriteIds != null && (!isToTxn || newTable.getWriteId() > 0)) { - // Check concurrent INSERT case and set false to the flag. - if (!isCurrentStatsValidForTheQuery(oldt, queryValidWriteIds, true)) { - StatsSetupConst.setBasicStatsState(oldt.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the table " + - dbname + "." + name + ". will be made persistent."); - } - assert newTable.getWriteId() > 0; - oldt.setWriteId(newTable.getWriteId()); + isAccurate = false; + } else if (writeId == 0 || validWriteIds == null) { + throw new MetaException(generateTxnStatsError(writeId, validWriteIds)); + } else if (!isCurrentStatsValidForTheQuery(oldt, validWriteIds, true)) { + isAccurate = false; } + oldt.setWriteId(writeId); } - newTable = convertToTable(oldt); + oldt.setAreStatsAccurate(isAccurate); + oldt.getParameters().putAll(stats); - // commit the changes + newTable = convertToTable(oldt); success = commitTransaction(); } finally { if (!success) { @@ -4171,30 +4196,65 @@ public Table alterTable(String catName, String dbname, String name, Table newTab return newTable; } - /** - * Verifies that the stats JSON string is unchanged for alter table (txn stats). - * @return Error message with the details of the change, or null if the value has not changed. - */ - private static String verifyStatsChangeCtx(Map oldP, Map newP, - long writeId, String validWriteIds, boolean isColStatsChange) { - if (validWriteIds != null && writeId > 0) return null; // We have txn context. - String oldVal = oldP == null ? null : oldP.get(StatsSetupConst.COLUMN_STATS_ACCURATE); - String newVal = newP == null ? null : newP.get(StatsSetupConst.COLUMN_STATS_ACCURATE); - // We don't need txn context is that stats state is not being changed. - if (StringUtils.isEmpty(oldVal) && StringUtils.isEmpty(newVal)) return null; - if (StringUtils.equalsIgnoreCase(oldVal, newVal)) { - if (!isColStatsChange) return null; // No change in col stats or parameters => assume no change. - // Col stats change while json stays "valid" implies stats change. If the new value is invalid, - // then we don't care. This is super ugly and idiotic. - // It will all become better when we get rid of JSON and store a flag and write ID per stats. - if (!StatsSetupConst.areBasicStatsUptoDate(newP)) return null; - } - // Some change to the stats state is being made; it can only be made with a write ID. - // Note - we could do this: if (writeId > 0 && (validWriteIds != null || !StatsSetupConst.areBasicStatsUptoDate(newP))) { return null; - // However the only way ID list can be absent is if WriteEntity wasn't generated for the alter, which is a separate bug. + private MTable ensureGetTableNormalized(String catName, String dbname, + String name) throws MetaException { + name = normalizeIdentifier(name); + dbname = normalizeIdentifier(dbname); + catName = normalizeIdentifier(catName); + MTable oldt = getMTable(catName, dbname, name); + if (oldt == null) { + throw new MetaException("table " + dbname + "." + name + " doesn't exist"); + } + return oldt; + } + + + @Override + public Partition alterPartitionBasicStats(String catName, String dbname, String name, List partVals, + Map stats, boolean isAccurate, long writeId, String validWriteIds) throws MetaException { + Partition newPart; + boolean success = false; + try { + openTransaction(); + name = normalizeIdentifier(name); + dbname = normalizeIdentifier(dbname); + catName = normalizeIdentifier(catName); + MTable table = getMTable(catName, dbname, name); + if (table == null) { + throw new MetaException("table " + dbname + "." + name + " doesn't exist"); + } + MPartition oldp = getMPartition(catName, dbname, name, partVals); + if (oldp == null) { + throw new MetaException("Partition " + dbname + "." + name + " " + partVals + " does not exist"); + } + boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); + if (isTxn) { + if (!areTxnStatsSupported) { + isAccurate = false; + } else if (writeId == 0 || validWriteIds == null) { + throw new MetaException(generateTxnStatsError(writeId, validWriteIds)); + } else if (!isCurrentStatsValidForTheQuery(oldp, validWriteIds, true)) { + isAccurate = false; + } + oldp.setWriteId(writeId); + } + oldp.setAreStatsAccurate(isAccurate); + oldp.getParameters().putAll(stats); + + newPart = convertToPart(oldp); + success = commitTransaction(); + } finally { + if (!success) { + rollbackTransaction(); + } + } + return newPart; + } + + private static String generateTxnStatsError(long writeId, String validWriteIds) { return "Cannot change stats state for a transactional table without providing the transactional" + " write state for verification (new write ID " + writeId + ", valid write IDs " - + validWriteIds + "; current state " + oldVal + "; new state " + newVal; + + validWriteIds; } @Override @@ -4253,14 +4313,10 @@ private Partition alterPartitionNoTxn(String catName, String dbname, String name } oldp.setValues(newp.getValues()); oldp.setPartitionName(newp.getPartitionName()); - boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); - if (isTxn && areTxnStatsSupported) { - // Transactional table is altered without a txn. Make sure there are no changes to the flag. - String errorMsg = verifyStatsChangeCtx(oldp.getParameters(), newPart.getParameters(), - newPart.getWriteId(), validWriteIds, false); - if (errorMsg != null) { - throw new MetaException(errorMsg); - } + if ((newPart.isSetWriteId() && oldp.getWriteId() != newPart.getWriteId()) + || (newPart.isSetIsStatsCompliant() && oldp.areStatsAccurate() != newPart.isIsStatsCompliant()) + || hasStatsParams(oldp.getParameters(), newPart.getParameters())) { + throw new MetaException("Stats cannot be modified by alterPartition call"); } oldp.setParameters(newPart.getParameters()); if (!TableType.VIRTUAL_VIEW.name().equals(oldp.getTable().getTableType())) { @@ -4273,22 +4329,6 @@ private Partition alterPartitionNoTxn(String catName, String dbname, String name oldp.setLastAccessTime(newp.getLastAccessTime()); } - // If transactional, add/update the MUPdaterTransaction - // for the current updater query. - if (isTxn) { - if (!areTxnStatsSupported) { - StatsSetupConst.setBasicStatsState(oldp.getParameters(), StatsSetupConst.FALSE); - } else if (validWriteIds != null && newPart.getWriteId() > 0) { - // Check concurrent INSERT case and set false to the flag. - if (!isCurrentStatsValidForTheQuery(oldp, validWriteIds, true)) { - StatsSetupConst.setBasicStatsState(oldp.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the partition " + - dbname + "." + name + "." + oldp.getPartitionName() + " will be made persistent."); - } - oldp.setWriteId(newPart.getWriteId()); - } - } - oldCd.t = oldCD; return convertToPart(oldp); } @@ -8434,53 +8474,45 @@ private void writeMPartitionColumnStatistics(Table table, Partition partition, // So let's not use them anywhere unless absolutely necessary. String catName = statsDesc.isSetCatName() ? statsDesc.getCatName() : getDefaultCatalog(conf); Table table = ensureGetTable(catName, statsDesc.getDbName(), statsDesc.getTableName()); + Boolean isAccurate = null; // null means check each stat individually. + boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); + if (!isTxn) { + isAccurate = true; + } else if (!areTxnStatsSupported) { + isAccurate = false; + } else if (writeId == 0 || validWriteIds == null) { + throw new MetaException(generateTxnStatsError(writeId, validWriteIds)); + } + List colNames = new ArrayList<>(); for (ColumnStatisticsObj statsObj : statsObjs) { colNames.add(statsObj.getColName()); } Map oldStats = getPartitionColStats(table, colNames); - for (ColumnStatisticsObj statsObj:statsObjs) { // We have to get mtable again because DataNucleus. MTableColumnStatistics mStatsObj = StatObjectConverter.convertToMTableColumnStatistics( ensureGetMTable(catName, statsDesc.getDbName(), statsDesc.getTableName()), statsDesc, statsObj); - writeMTableColumnStatistics(table, mStatsObj, oldStats.get(statsObj.getColName())); - // There is no need to add colname again, otherwise we will get duplicate colNames. - } - // TODO: (HIVE-20109) ideally the col stats stats should be in colstats, not in the table! - // Set the table properties - // No need to check again if it exists. - String dbname = table.getDbName(); - String name = table.getTableName(); - MTable oldt = getMTable(catName, dbname, name); - Map newParams = new HashMap<>(table.getParameters()); - StatsSetupConst.setColumnStatsState(newParams, colNames); - boolean isTxn = TxnUtils.isTransactionalTable(oldt.getParameters()); - if (isTxn) { - if (!areTxnStatsSupported) { - StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE); - } else { - String errorMsg = verifyStatsChangeCtx( - oldt.getParameters(), newParams, writeId, validWriteIds, true); - if (errorMsg != null) { - throw new MetaException(errorMsg); - } - if (!isCurrentStatsValidForTheQuery(oldt, validWriteIds, true)) { - // Make sure we set the flag to invalid regardless of the current value. - StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the table " - + dbname + "." + name); - } - oldt.setWriteId(writeId); + // The stats are accurate if all of the following are true: + // 1) The caller thinks they are accurate. + // 2) There's no global flag preventing them from being accurate. + // 3) The stats txn state is valid. + boolean isCallerColAccurate = statsObj.isSetIsStatsCompliant() && statsObj.isIsStatsCompliant(); + boolean isColAccurate = isCallerColAccurate && ((isAccurate != null) ? isAccurate + : isCurrentStatsValidForTheQuery(conf, mStatsObj.areStatsAccurate(), + mStatsObj.getWriteId(), validWriteIds, true)); + mStatsObj.setAreStatsAccurate(isColAccurate); + if (isTxn) { + mStatsObj.setWriteId(writeId); } + writeMTableColumnStatistics(table, mStatsObj, oldStats.get(statsObj.getColName())); + // There is no need to add colname again, otherwise we will get duplicate colNames. } - oldt.setParameters(newParams); committed = commitTransaction(); - // TODO: similar to update...Part, this used to do "return committed;"; makes little sense. - return committed ? newParams : null; + return null; } finally { if (!committed) { rollbackTransaction(); @@ -8529,6 +8561,15 @@ private void writeMPartitionColumnStatistics(Table table, Partition partition, Partition partition = convertToPart(getMPartition( catName, statsDesc.getDbName(), statsDesc.getTableName(), partVals)); List colNames = new ArrayList<>(); + Boolean isAccurate = null; // null means check each stat individually. + boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); + if (!isTxn) { + isAccurate = true; + } else if (!areTxnStatsSupported) { + isAccurate = false; + } else if (writeId == 0 || validWriteIds == null) { + throw new MetaException(generateTxnStatsError(writeId, validWriteIds)); + } for(ColumnStatisticsObj statsObj : statsObjs) { colNames.add(statsObj.getColName()); @@ -8546,36 +8587,74 @@ private void writeMPartitionColumnStatistics(Table table, Partition partition, for (ColumnStatisticsObj statsObj : statsObjs) { MPartitionColumnStatistics mStatsObj = StatObjectConverter.convertToMPartitionColumnStatistics(mPartition, statsDesc, statsObj); + // The stats are accurate if all of the following are true: + // 1) The caller thinks they are accurate. + // 2) There's no global flag preventing them from being accurate. + // 3) The stats txn state is valid. + boolean isCallerColAccurate = statsObj.isSetIsStatsCompliant() && statsObj.isIsStatsCompliant(); + boolean isColAccurate = isCallerColAccurate && ((isAccurate != null) ? isAccurate + : isCurrentStatsValidForTheQuery(conf, mStatsObj.areStatsAccurate(), mStatsObj.getWriteId(), + validWriteIds, true)); + mStatsObj.setAreStatsAccurate(isColAccurate); + if (isTxn) { + mStatsObj.setWriteId(writeId); + } writeMPartitionColumnStatistics(table, partition, mStatsObj, oldStats.get(statsObj.getColName())); } - // TODO: (HIVE-20109) the col stats stats should be in colstats, not in the partition! - Map newParams = new HashMap<>(mPartition.getParameters()); - StatsSetupConst.setColumnStatsState(newParams, colNames); - boolean isTxn = TxnUtils.isTransactionalTable(table); - if (isTxn) { - if (!areTxnStatsSupported) { - StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE); + + committed = commitTransaction(); + // TODO# change return type based on the cache change + return null; + } finally { + if (!committed) { + rollbackTransaction(); + } + } + } + + + @Override + public Map invalidateAllColumnStatistics(String catName, String dbName, + String tblName, List partNames, long writeId) + throws MetaException, NoSuchObjectException { + boolean committed = false; + + try { + openTransaction(); + MTable table = ensureGetTableNormalized(catName, dbName, tblName); + if (writeId <= 0 && TxnUtils.isTransactionalTable(table.getParameters())) { + throw new MetaException("writeId was not specified when invalidating transactional stats"); + } + Table t = convertToTable(table); + List allCols = t.getSd().getCols().stream().map(c -> c.getName()).collect( + Collectors.toList()); + QueryWrapper qw = new QueryWrapper(); + try { + if (table.getPartitionKeys().isEmpty()) { + List allStats = getMTableColumnStatistics(t, allCols, qw); + for (MTableColumnStatistics stat : allStats) { + stat.setWriteId(writeId); + stat.setAreStatsAccurate(false); + } } else { - String errorMsg = verifyStatsChangeCtx( - mPartition.getParameters(), newParams, writeId, validWriteIds, true); - if (errorMsg != null) { - throw new MetaException(errorMsg); + if (partNames == null) { + partNames = getPartitionNamesNoTxn(catName, dbName, tblName, (short)-1); } - if (!isCurrentStatsValidForTheQuery(mPartition, validWriteIds, true)) { - // Make sure we set the flag to invalid regardless of the current value. - StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the partition " - + statsDesc.getDbName() + "." + statsDesc.getTableName() + "." + statsDesc.getPartName()); + List allStats = getMPartitionColumnStatistics( + t, partNames, allCols, qw); + for (MPartitionColumnStatistics stat : allStats) { + stat.setWriteId(writeId); + stat.setAreStatsAccurate(false); } - mPartition.setWriteId(writeId); } + } finally { + qw.close(); } - mPartition.setParameters(newParams); committed = commitTransaction(); - // TODO: what is the "return committed;" about? would it ever return false without throwing? - return committed ? newParams : null; + // TODO# change return type based on the cache change + return null; } finally { if (!committed) { rollbackTransaction(); @@ -8677,16 +8756,16 @@ public ColumnStatistics getTableColumnStatistics( String writeIdList) throws MetaException, NoSuchObjectException { // If the current stats in the metastore doesn't comply with // the isolation level of the query, set No to the compliance flag. - Boolean isCompliant = null; - if (writeIdList != null) { - MTable table = this.getMTable(catName, dbName, tableName); - isCompliant = !TxnUtils.isTransactionalTable(table.getParameters()) - || (areTxnStatsSupported && isCurrentStatsValidForTheQuery(table, writeIdList, false)); - } + MTable table = this.getMTable(catName, dbName, tableName); + boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); ColumnStatistics stats = getTableColumnStatisticsInternal( catName, dbName, tableName, colNames, true, true); - if (stats != null && isCompliant != null) { - stats.setIsStatsCompliant(isCompliant); + if (stats != null) { + for (ColumnStatisticsObj cso : stats.getStatsObj()) { + if (!cso.isIsStatsCompliant()) continue; + cso.setIsStatsCompliant(!isTxn || (areTxnStatsSupported && writeIdList != null + && isCurrentStatsValidForTheQuery(conf, true, cso.getWriteId(), writeIdList, false))); + } } return stats; } @@ -8748,29 +8827,20 @@ protected ColumnStatistics getJdoResult( if (partNames == null && partNames.isEmpty()) { return null; } + MTable table = getMTable(catName, dbName, tableName); + boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); List allStats = getPartitionColumnStatisticsInternal( catName, dbName, tableName, partNames, colNames, true, true); - if (writeIdList != null) { - if (!areTxnStatsSupported) { - for (ColumnStatistics cs : allStats) { - cs.setIsStatsCompliant(false); - } - } else { - // TODO: this could be improved to get partitions in bulk - for (ColumnStatistics cs : allStats) { - MPartition mpart = getMPartition(catName, dbName, tableName, - Warehouse.getPartValuesFromPartName(cs.getStatsDesc().getPartName())); - if (mpart == null - || !isCurrentStatsValidForTheQuery(mpart, writeIdList, false)) { - if (mpart != null) { - LOG.debug("The current metastore transactional partition column statistics for {}.{}.{} " - + "(write ID {}) are not valid for current query ({} {})", dbName, tableName, - mpart.getPartitionName(), mpart.getWriteId(), writeIdList); - } - cs.setIsStatsCompliant(false); - } else { - cs.setIsStatsCompliant(true); - } + for (ColumnStatistics cs : allStats) { + for (ColumnStatisticsObj obj : cs.getStatsObj()) { + if (!obj.isIsStatsCompliant()) continue; // Don't check if it's already invalid. + if (!isTxn) { + obj.setIsStatsCompliant(true); + } else if (!areTxnStatsSupported || writeIdList == null) { + obj.setIsStatsCompliant(false); + } else { + obj.setIsStatsCompliant(isCurrentStatsValidForTheQuery(conf, + obj.isSetIsStatsCompliant() && obj.isIsStatsCompliant(), obj.getWriteId(), writeIdList, false)); } } } @@ -8829,39 +8899,33 @@ protected ColumnStatistics getJdoResult( public AggrStats get_aggr_stats_for(String catName, String dbName, String tblName, final List partNames, final List colNames, String writeIdList) throws MetaException, NoSuchObjectException { - // If the current stats in the metastore doesn't comply with - // the isolation level of the query, return null. - if (writeIdList != null) { - if (partNames == null && partNames.isEmpty()) { - return null; - } + if (partNames == null || partNames.isEmpty()) { + return null; + } - MTable table = getMTable(catName, dbName, tblName); - boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); - if (isTxn && !areTxnStatsSupported) { + MTable tbl = getMTable(catName, dbName, tblName); + if (TxnUtils.isTransactionalTable(tbl.getParameters())) { + if (!areTxnStatsSupported || writeIdList == null) { return null; } - - // Loop through the given "partNames" list - // checking isolation-level-compliance of each partition column stats. - for (String partName : partNames) { - MPartition mpart = getMPartition( - catName, dbName, tblName, Warehouse.getPartValuesFromPartName(partName)); - if (!isCurrentStatsValidForTheQuery(mpart, writeIdList, false)) { - LOG.debug("The current metastore transactional partition column statistics " + - "for " + dbName + "." + tblName + "." + mpart.getPartitionName() + " is not valid " + - "for the current query."); - return null; - } - } } - return get_aggr_stats_for(catName, dbName, tblName, partNames, colNames); + return getAggrStatsInternal(catName, dbName, tblName, partNames, colNames, writeIdList); } @Override public AggrStats get_aggr_stats_for(String catName, String dbName, String tblName, final List partNames, final List colNames) throws MetaException, NoSuchObjectException { + MTable tbl = getMTable(catName, dbName, tblName); + if (TxnUtils.isTransactionalTable(tbl.getParameters())) { + return null; + } + return getAggrStatsInternal(catName, dbName, tblName, partNames, colNames, null); + } + + private AggrStats getAggrStatsInternal(String catName, String dbName, + String tblName, final List partNames, final List colNames, String writeIdList) + throws MetaException, NoSuchObjectException { final boolean useDensityFunctionForNDVEstimation = MetastoreConf.getBoolVar(getConf(), ConfVars.STATS_NDV_DENSITY_FUNCTION); final double ndvTuner = MetastoreConf.getDoubleVar(getConf(), ConfVars.STATS_NDV_TUNER); @@ -8871,7 +8935,7 @@ public AggrStats get_aggr_stats_for(String catName, String dbName, String tblNam protected AggrStats getSqlResult(GetHelper ctx) throws MetaException { return directSql.aggrColStatsForPartitions(catName, dbName, tblName, partNames, - colNames, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector); + colNames, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector, writeIdList); } @Override protected AggrStats getJdoResult(GetHelper ctx) @@ -12433,7 +12497,7 @@ public int deleteRuntimeStats(int maxRetainSecs) throws MetaException { */ private boolean isCurrentStatsValidForTheQuery(MTable tbl, String queryValidWriteIdList, boolean isCompleteStatsWriter) throws MetaException { - return isCurrentStatsValidForTheQuery(conf, tbl.getParameters(), tbl.getWriteId(), + return isCurrentStatsValidForTheQuery(conf, tbl.areStatsAccurate(), tbl.getWriteId(), queryValidWriteIdList, isCompleteStatsWriter); } @@ -12454,25 +12518,25 @@ private boolean isCurrentStatsValidForTheQuery(MTable tbl, String queryValidWrit private boolean isCurrentStatsValidForTheQuery(MPartition part, String queryValidWriteIdList, boolean isCompleteStatsWriter) throws MetaException { - return isCurrentStatsValidForTheQuery(conf, part.getParameters(), part.getWriteId(), + return isCurrentStatsValidForTheQuery(conf, part.areStatsAccurate(), part.getWriteId(), queryValidWriteIdList, isCompleteStatsWriter); } private boolean isCurrentStatsValidForTheQuery(Partition part, long partWriteId, String queryValidWriteIdList, boolean isCompleteStatsWriter) throws MetaException { - return isCurrentStatsValidForTheQuery(conf, part.getParameters(), partWriteId, + return isCurrentStatsValidForTheQuery(conf, part.isIsStatsCompliant(), partWriteId, queryValidWriteIdList, isCompleteStatsWriter); } // TODO: move to somewhere else public static boolean isCurrentStatsValidForTheQuery(Configuration conf, - Map statsParams, long statsWriteId, String queryValidWriteIdList, + boolean areStatsAccurate, long statsWriteId, String queryValidWriteIdList, boolean isCompleteStatsWriter) throws MetaException { // Note: can be changed to debug/info to verify the calls. - LOG.debug("isCurrentStatsValidForTheQuery with stats write ID {}; query {}; writer: {} params {}", - statsWriteId, queryValidWriteIdList, isCompleteStatsWriter, statsParams); + LOG.debug("isCurrentStatsValidForTheQuery with stats write ID {}; query {}; writer: {} accurate {}", + statsWriteId, queryValidWriteIdList, isCompleteStatsWriter, areStatsAccurate); // return true since the stats does not seem to be transactional. if (statsWriteId < 1) { return true; @@ -12480,7 +12544,7 @@ public static boolean isCurrentStatsValidForTheQuery(Configuration conf, // This COLUMN_STATS_ACCURATE(CSA) state checking also includes the case that the stats is // written by an aborted transaction but TXNS has no entry for the transaction // after compaction. Don't check for a complete stats writer - it may replace invalid stats. - if (!isCompleteStatsWriter && !StatsSetupConst.areBasicStatsUptoDate(statsParams)) { + if (!isCompleteStatsWriter && !areStatsAccurate) { return false; } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index 8d647a0f6a11aea3785bf2f577caa1c7cb4b50ad..47e2fd79bc3e00ae2aa6a28a28011619f51e0fbb 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -1703,8 +1703,8 @@ void alterSchemaVersion(SchemaVersionDescriptor version, SchemaVersion newVersio List getAllTableNamesForStats() throws MetaException, NoSuchObjectException; - Map> getPartitionColsWithStats(String catName, String dbName, - String tableName) throws MetaException, NoSuchObjectException; + Map> getPartitionColsWithAccurateStats(String catName, String dbName, + String tableName, String validWriteIds, boolean isAccurate) throws MetaException, NoSuchObjectException; /** * Remove older notification events. @@ -1719,4 +1719,17 @@ void alterSchemaVersion(SchemaVersionDescriptor version, SchemaVersion newVersio * @param tableName the name of the table for which the dump is being taken */ List getAllWriteEventInfo(long txnId, String dbName, String tableName) throws MetaException; + + Table alterTableBasicStats(String catName, String dbname, String name, + Map stats, boolean isAccurate, long writeId, + String validWriteIds) throws MetaException; + + Partition alterPartitionBasicStats(String catName, String dbname, + String name, List partVals, Map stats, + boolean isAccurate, long writeId, String validWriteIds) + throws MetaException; + + Map invalidateAllColumnStatistics(String catName, + String dbName, String tblName, List partNames, long writeId) + throws MetaException, NoSuchObjectException; } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index 7a0b21b2580d8bb9b256dbc698f125ed15ccdcd3..9df7b224a9f013eb5c0e74ff484e50c7568a4463 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -133,6 +133,8 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl public static void setFieldsIntoOldStats( MTableColumnStatistics mStatsObj, MTableColumnStatistics oldStatsObj) { + oldStatsObj.setWriteId(mStatsObj.getWriteId()); + oldStatsObj.setAreStatsAccurate(mStatsObj.areStatsAccurate()); if (mStatsObj.getAvgColLen() != null) { oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); } @@ -177,6 +179,8 @@ public static void setFieldsIntoOldStats( public static void setFieldsIntoOldStats( MPartitionColumnStatistics mStatsObj, MPartitionColumnStatistics oldStatsObj) { + oldStatsObj.setWriteId(mStatsObj.getWriteId()); + oldStatsObj.setAreStatsAccurate(mStatsObj.areStatsAccurate()); if (mStatsObj.getAvgColLen() != null) { oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); } @@ -224,6 +228,8 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColType(mStatsObj.getColType()); statsObj.setColName(mStatsObj.getColName()); + statsObj.setIsStatsCompliant(mStatsObj.areStatsAccurate()); + statsObj.setWriteId(mStatsObj.getWriteId()); String colType = mStatsObj.getColType().toLowerCase(); ColumnStatisticsData colStatsData = new ColumnStatisticsData(); @@ -401,6 +407,8 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColType(mStatsObj.getColType()); statsObj.setColName(mStatsObj.getColName()); + statsObj.setIsStatsCompliant(mStatsObj.areStatsAccurate()); + statsObj.setWriteId(mStatsObj.getWriteId()); String colType = mStatsObj.getColType().toLowerCase(); ColumnStatisticsData colStatsData = new ColumnStatisticsData(); diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index f73047f9ff293789df3f03d35ac4140eaa154145..8f8273edf806e5716520e32f1b2ccb1da36154a5 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -39,7 +39,6 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.DatabaseName; -import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.metastore.Deadline; import org.apache.hadoop.hive.metastore.FileMetadataHandler; @@ -870,9 +869,13 @@ public Table getTable(String catName, String dbName, String tblName, tblsPendingPrewarm.prioritizeTableForPrewarm(tblName); return rawStore.getTable(catName, dbName, tblName, validWriteIds); } - if (validWriteIds != null) { - tbl.setParameters(adjustStatsParamsForGet(tbl.getParameters(), - tbl.getParameters(), tbl.getWriteId(), validWriteIds)); + if (tbl.isIsStatsCompliant() && TxnUtils.isTransactionalTable(tbl)) { + if (!areTxnStatsSupported || validWriteIds == null || !ObjectStore.isCurrentStatsValidForTheQuery( + conf, true, tbl.getWriteId(), validWriteIds, false)) { + // Clone to avoid affecting the cached object. + tbl = new Table(tbl); + tbl.setIsStatsCompliant(false); + } } tbl.unsetPrivileges(); @@ -954,15 +957,19 @@ public Partition getPartition(String catName, String dbName, String tblName, return rawStore.getPartition( catName, dbName, tblName, part_vals, validWriteIds); } - if (validWriteIds != null) { - Table table = sharedCache.getTableFromCache(catName, dbName, tblName); - if (table == null) { - // The table containing the partition is not yet loaded in cache - return rawStore.getPartition( - catName, dbName, tblName, part_vals, validWriteIds); + Table table = sharedCache.getTableFromCache(catName, dbName, tblName); + if (table == null) { + // The table containing the partition is not yet loaded in cache + return rawStore.getPartition( + catName, dbName, tblName, part_vals, validWriteIds); + } + if (TxnUtils.isTransactionalTable(table) && part.isIsStatsCompliant()) { + if (!areTxnStatsSupported || validWriteIds == null || !ObjectStore.isCurrentStatsValidForTheQuery( + conf, true, part.getWriteId(), validWriteIds, false)) { + // Clone to avoid affecting the cached object. + part = new Partition(part); + part.setIsStatsCompliant(false); } - part.setParameters(adjustStatsParamsForGet(table.getParameters(), - part.getParameters(), part.getWriteId(), validWriteIds)); } return part; @@ -1634,39 +1641,24 @@ public Partition getPartitionWithAuth(String catName, String dbName, String tblN return partitions; } - // Note: ideally this should be above both CachedStore and ObjectStore. - private Map adjustStatsParamsForGet(Map tableParams, - Map params, long statsWriteId, String validWriteIds) throws MetaException { - if (!TxnUtils.isTransactionalTable(tableParams)) return params; // Not a txn table. - if (areTxnStatsSupported && ((validWriteIds == null) - || ObjectStore.isCurrentStatsValidForTheQuery( - conf, params, statsWriteId, validWriteIds, false))) { - // Valid stats are supported for txn tables, and either no verification was requested by the - // caller, or the verification has succeeded. - return params; - } - // Clone the map to avoid affecting the cached value. - params = new HashMap<>(params); - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); - return params; - } - // Note: ideally this should be above both CachedStore and ObjectStore. private ColumnStatistics adjustColStatForGet(Map tableParams, - Map params, ColumnStatistics colStat, long statsWriteId, - String validWriteIds) throws MetaException { - colStat.setIsStatsCompliant(true); + ColumnStatistics colStat, long statsWriteId, String validWriteIds) throws MetaException { if (!TxnUtils.isTransactionalTable(tableParams)) return colStat; // Not a txn table. - if (areTxnStatsSupported && ((validWriteIds == null) - || ObjectStore.isCurrentStatsValidForTheQuery( - conf, params, statsWriteId, validWriteIds, false))) { - // Valid stats are supported for txn tables, and either no verification was requested by the - // caller, or the verification has succeeded. - return colStat; - } - // Don't clone; ColStats objects are not cached, only their parts. - colStat.setIsStatsCompliant(false); + for (int i = 0; i < colStat.getStatsObj().size(); ++i) { + ColumnStatisticsObj cso = colStat.getStatsObj().get(i); + if (!cso.isIsStatsCompliant()) continue; // No need to re-check if it's already incorrect. + boolean isCompliant = areTxnStatsSupported && ((validWriteIds != null) + && ObjectStore.isCurrentStatsValidForTheQuery( + conf, true, cso.getWriteId(), validWriteIds, false)); + if (!isCompliant) { + // Clone to avoid affecting the cached object. + ColumnStatisticsObj clone = new ColumnStatisticsObj(cso); + clone.setIsStatsCompliant(false); + colStat.getStatsObj().set(i, clone); + } + } return colStat; } @@ -1724,7 +1716,7 @@ public ColumnStatistics getTableColumnStatistics( ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tblName); List colStatObjs = sharedCache.getTableColStatsFromCache(catName, dbName, tblName, colNames); - return adjustColStatForGet(table.getParameters(), table.getParameters(), + return adjustColStatForGet(table.getParameters(), new ColumnStatistics(csd, colStatObjs), table.getWriteId(), validWriteIds); } @@ -2593,8 +2585,33 @@ public int deleteRuntimeStats(int maxRetainSecs) throws MetaException { } @Override - public Map> getPartitionColsWithStats(String catName, - String dbName, String tableName) throws MetaException, NoSuchObjectException { - return rawStore.getPartitionColsWithStats(catName, dbName, tableName); + public Map> getPartitionColsWithAccurateStats(String catName, + String dbName, String tableName, String validWriteId, boolean isAccurate) throws MetaException, NoSuchObjectException { + return rawStore.getPartitionColsWithAccurateStats(catName, dbName, tableName, validWriteId, isAccurate); + } + + @Override + public Table alterTableBasicStats(String catName, String dbname, String name, + Map stats, boolean isAccurate, long writeId, String validWriteIds) + throws MetaException { + // TODO# Auto-generated method stub + return rawStore.alterTableBasicStats(catName, dbname, name, stats, isAccurate, writeId, validWriteIds); + } + + @Override + public Partition alterPartitionBasicStats(String catName, String dbname, + String name, List partVals, Map stats, + boolean isAccurate, long writeId, String validWriteIds) + throws MetaException { + // TODO# Auto-generated method stub + return rawStore.alterPartitionBasicStats(catName, dbname, name, partVals, stats, isAccurate, writeId, validWriteIds); + } + + @Override + public Map invalidateAllColumnStatistics(String catName, + String dbName, String tblName, List partNames, long writeId) + throws MetaException, NoSuchObjectException { + // TODO# Auto-generated method stub + return rawStore.invalidateAllColumnStatistics(catName, dbName, tblName, partNames, writeId); } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java index c18b4c79bfb2b9f3413884232d34f3ff84f8c495..9327b806bb37760bb8d548235bd9dde9780afbe2 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BinaryColumnStatsAggregator.java @@ -43,7 +43,9 @@ public ColumnStatisticsObj aggregate(List colStatsWit colType = cso.getColType(); statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); + statsObj.setIsStatsCompliant(true); } + statsObj.setIsStatsCompliant(statsObj.isIsStatsCompliant() && cso.isIsStatsCompliant()); BinaryColumnStatsData newData = cso.getStatsData().getBinaryStats(); if (aggregateData == null) { aggregateData = newData.deepCopy(); diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java index 76301831802a0dd37134455356c7d4c4dbe15946..f2f616267cbce565b05385af054694bf844530bd 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/BooleanColumnStatsAggregator.java @@ -43,7 +43,9 @@ public ColumnStatisticsObj aggregate(List colStatsWit colType = cso.getColType(); statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); + statsObj.setIsStatsCompliant(true); } + statsObj.setIsStatsCompliant(statsObj.isIsStatsCompliant() && cso.isIsStatsCompliant()); BooleanColumnStatsData newData = cso.getStatsData().getBooleanStats(); if (aggregateData == null) { aggregateData = newData.deepCopy(); diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java index 7aaab4a6b978adf99121fe807c013b23bdeaf624..e78814de8482918b81e011f335481d4c205759c8 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java @@ -73,6 +73,7 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col ColumnStatisticsData csd = new ColumnStatisticsData(); cso.setColName(colName); cso.setColType(colType); + cso.setIsStatsCompliant(true); // Note: aggregates must update this when aggregating. switch (type) { case BOOLEAN_STATS: csd.setBooleanStats(new BooleanColumnStatsData()); diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java index e8ff513f50964cf274801388919b593ac9f89118..a43cbd0ed106f59de6dea3e6822248d82913e1a5 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java @@ -61,7 +61,9 @@ public ColumnStatisticsObj aggregate(List colStatsWit statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); + statsObj.setIsStatsCompliant(true); } + statsObj.setIsStatsCompliant(statsObj.isIsStatsCompliant() && cso.isIsStatsCompliant()); DateColumnStatsDataInspector dateColumnStats = (DateColumnStatsDataInspector) cso.getStatsData().getDateStats(); if (dateColumnStats.getNdvEstimator() == null) { diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java index ac7e8e35f9d0fc56540dfa31e2e4f7c311271310..414bdeb076b80bce0c454554e0202154c9ecccd4 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java @@ -64,7 +64,9 @@ public ColumnStatisticsObj aggregate(List colStatsWit cso.getStatsData().getSetField()); LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); + statsObj.setIsStatsCompliant(true); } + statsObj.setIsStatsCompliant(statsObj.isIsStatsCompliant() && cso.isIsStatsCompliant()); DecimalColumnStatsDataInspector decimalColumnStatsData = (DecimalColumnStatsDataInspector) cso.getStatsData().getDecimalStats(); if (decimalColumnStatsData.getNdvEstimator() == null) { diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java index ece77dd51bbe2313030925c6bf99532a63c0cbb2..186d317a15bd8280cb6df497d363d038b7e74905 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java @@ -61,7 +61,9 @@ public ColumnStatisticsObj aggregate(List colStatsWit cso.getStatsData().getSetField()); LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); + statsObj.setIsStatsCompliant(true); } + statsObj.setIsStatsCompliant(statsObj.isIsStatsCompliant() && cso.isIsStatsCompliant()); DoubleColumnStatsDataInspector doubleColumnStatsData = (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats(); if (doubleColumnStatsData.getNdvEstimator() == null) { diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java index e6823d342af30973d5462bd26fd6e4b7cf2eec8e..3395d9501ae729113fc4573c9614a5c02955c36d 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java @@ -62,7 +62,9 @@ public ColumnStatisticsObj aggregate(List colStatsWit cso.getStatsData().getSetField()); LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); + statsObj.setIsStatsCompliant(true); } + statsObj.setIsStatsCompliant(statsObj.isIsStatsCompliant() && cso.isIsStatsCompliant()); LongColumnStatsDataInspector longColumnStatsData = (LongColumnStatsDataInspector) cso.getStatsData().getLongStats(); if (longColumnStatsData.getNdvEstimator() == null) { diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java index 9537647503d07cbd56b78c53f47a0c3e6e27bd30..d6172878463b630dadca4420b4fb55371bb65b91 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java @@ -62,7 +62,10 @@ public ColumnStatisticsObj aggregate(List colStatsWit cso.getStatsData().getSetField()); LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); + statsObj.setIsStatsCompliant(true); } + // TODO# do all these places need to do a separate writeID check? + statsObj.setIsStatsCompliant(statsObj.isIsStatsCompliant() && cso.isIsStatsCompliant()); StringColumnStatsDataInspector stringColumnStatsData = (StringColumnStatsDataInspector) cso.getStatsData().getStringStats(); if (stringColumnStatsData.getNdvEstimator() == null) { diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartition.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartition.java index 267c9e8e5acd7f3b3666f9a68780091c5e792380..a534cd0b2a9e6185dcfe70c84472c6d83f8c3566 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartition.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartition.java @@ -24,16 +24,17 @@ public class MPartition { private String partitionName; // partitionname ==> (key=value/)*(key=value) - private MTable table; + private MTable table; private List values; private int createTime; private int lastAccessTime; private MStorageDescriptor sd; private Map parameters; private long writeId; - + private boolean areStatsAccurate; + public MPartition() {} - + /** * @param partitionName * @param table @@ -152,6 +153,7 @@ public void setCreateTime(int createTime) { this.createTime = createTime; } + public long getWriteId() { return writeId; } @@ -159,4 +161,12 @@ public long getWriteId() { public void setWriteId(long writeId) { this.writeId = writeId; } + + public boolean areStatsAccurate() { + return areStatsAccurate; + } + + public void setAreStatsAccurate(boolean areStatsAccurate) { + this.areStatsAccurate = areStatsAccurate; + } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 50d9c5b0cf8d01ad3bbb3846162e4423724c9a47..c19d196fa063255f27596241b4eb22b00f5d65d1 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -56,6 +56,10 @@ private Long numFalses; private long lastAnalyzed; + private long writeId; + + private boolean areStatsAccurate; + public MPartitionColumnStatistics() {} public String getTableName() { @@ -278,4 +282,20 @@ public void setDecimalHighValue(String decimalHighValue) { public void setBitVector(byte[] bitVector) { this.bitVector = bitVector; } + + public long getWriteId() { + return writeId; + } + + public void setWriteId(long writeId) { + this.writeId = writeId; + } + + public boolean areStatsAccurate() { + return areStatsAccurate; + } + + public void setAreStatsAccurate(boolean areStatsAccurate) { + this.areStatsAccurate = areStatsAccurate; + } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java index deeb97133d4aeb362c892e4a08346189eec26b09..33dc61948f7d9928bd5acc19c8ef10e9cf61a712 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTable.java @@ -23,7 +23,7 @@ import java.util.Map; public class MTable { - + private String tableName; private MDatabase database; private MStorageDescriptor sd; @@ -39,6 +39,7 @@ private boolean rewriteEnabled; private String tableType; private long writeId; + private boolean areStatsAccurate; public MTable() {} @@ -280,4 +281,12 @@ public long getWriteId() { public void setWriteId(long writeId) { this.writeId = writeId; } + + public boolean areStatsAccurate() { + return areStatsAccurate; + } + + public void setAreStatsAccurate(boolean areStatsAccurate) { + this.areStatsAccurate = areStatsAccurate; + } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 731cd6f7facb5ba64093be8d2319e9ed47ae103c..4a8c11c64c76e11d72c764aedb21796eb5008435 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -53,6 +53,8 @@ private Long numTrues; private Long numFalses; private long lastAnalyzed; + private long writeId; + private boolean areStatsAccurate; public MTableColumnStatistics() {} @@ -269,4 +271,21 @@ public void setDecimalHighValue(String decimalHighValue) { public void setBitVector(byte[] bitVector) { this.bitVector = bitVector; } + + + public long getWriteId() { + return writeId; + } + + public void setWriteId(long writeId) { + this.writeId = writeId; + } + + public boolean areStatsAccurate() { + return areStatsAccurate; + } + + public void setAreStatsAccurate(boolean areStatsAccurate) { + this.areStatsAccurate = areStatsAccurate; + } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java index 1f559e95bb29bb54f5e48d48a376f2c4385eb65c..e80223469d2d1ba36b546d6da6c2ffe0c173bda2 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.metastore.txn; -import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.classification.RetrySemantics; import org.apache.hadoop.hive.metastore.api.CompactionType; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -328,7 +327,7 @@ public void markCompacted(CompactionInfo info) throws MetaException { /** * This will remove an entry from the queue after * it has been compacted. - * + * * @param info info on the compaction entry to remove */ @Override @@ -993,7 +992,7 @@ private int getFailedCompactionRetention() { * User initiated compactions don't do this check. * * Do we allow compacting whole table (when it's partitioned)? No, though perhaps we should. - * That would be a meta operations, i.e. first find all partitions for this table (which have + * That would be a meta operations, i.e. first find all partitions for this table (which have * txn info) and schedule each compaction separately. This avoids complications in this logic. */ @Override diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 7cdcd626a71a5d1a82306426e60a520acd40d31d..696dbaa733469285a9abafba4bf577028720c749 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -683,9 +683,15 @@ public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { * @param forceRecompute Recompute stats even if the passed Table already has * these parameters set */ - public static void updateTableStatsSlow(Database db, Table tbl, Warehouse wh, + public static void updateTableFsStatsSlow(Database db, Table tbl, Warehouse wh, boolean newDir, boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { + + // Note: 1) This updates FS stats only; those are never used for query results so it's ok to + // change them for transactional tables without txn context. + // 2) This also never alters stats-accurate state, because it doesn't check other stats, + // e.g. row counts, that are actually used for query results. + // DO_NOT_UPDATE_STATS is supposed to be a transient parameter that is only passed via RPC // We want to avoid this property from being persistent. // @@ -694,6 +700,7 @@ public static void updateTableStatsSlow(Database db, Table tbl, Warehouse wh, // // This problem was introduced by HIVE-10228. A better approach would be to pass the property // via the environment context. + // TODO: request, not env context. Map params = tbl.getParameters(); boolean updateStats = true; if ((params != null) && params.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) { @@ -722,14 +729,7 @@ public static void updateTableStatsSlow(Database db, Table tbl, Warehouse wh, populateQuickStats(fileStatus, params); LOG.info("Updated size of table {} to {}", tbl.getTableName(), params.get(StatsSetupConst.TOTAL_SIZE)); - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( - StatsSetupConst.STATS_GENERATED))) { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); - } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); - } + // Note: DO NOT set stats to accurate here. See above. } /** This method is invalid for MM and ACID tables unless fileStatus comes from AcidUtils. */ @@ -765,19 +765,10 @@ public static boolean areSameColumns(List oldCols, List - params) { - if (params == null) { - return; - } - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( - StatsSetupConst.STATS_GENERATED))) { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); - } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); - } + // Another legacy method that needs to be converted to not use EC. + public static boolean areStatsGenerated(EnvironmentContext ec) { + return ec != null && ec.isSetProperties() + && StatsSetupConst.TASK.equals(ec.getProperties().get(StatsSetupConst.STATS_GENERATED)); } /** @@ -828,7 +819,7 @@ public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionItera // TODO: this is invalid for ACID tables, and we cannot access AcidUtils here. populateQuickStats(fileStatus, params); LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); - updateBasicState(environmentContext, params); + // Note: we don't set stats to valid, because we only update a subset of the stats. } part.setParameters(params); return true; @@ -1043,24 +1034,11 @@ public static boolean partitionNameHasValidCharacters(List partVals, return getPartitionValWithInvalidCharacter(partVals, partitionValidationPattern) == null; } - public static void getMergableCols(ColumnStatistics csNew, Map parameters) { - List list = new ArrayList<>(); - for (int index = 0; index < csNew.getStatsObj().size(); index++) { - ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); - // canColumnStatsMerge guarantees that it is accurate before we do merge - if (StatsSetupConst.canColumnStatsMerge(parameters, statsObjNew.getColName())) { - list.add(statsObjNew); - } - // in all the other cases, we can not merge - } - csNew.setStatsObj(list); - } - // this function will merge csOld into csNew. - public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) - throws InvalidObjectException { + public static void mergeColStats( + ColumnStatistics csNew, ColumnStatistics csOld) throws InvalidObjectException { List list = new ArrayList<>(); - if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) { + if (csOld != null && csNew.getStatsObj().size() != csOld.getStatsObjSize()) { // Some of the columns' stats are missing // This implies partition schema has changed. We will merge columns // present in both, overwrite stats for columns absent in metastore and @@ -1072,8 +1050,10 @@ public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) // In this case, we have to find out which columns can be merged. Map map = new HashMap<>(); // We build a hash map from colName to object for old ColumnStats. - for (ColumnStatisticsObj obj : csOld.getStatsObj()) { - map.put(obj.getColName(), obj); + if (csOld != null) { + for (ColumnStatisticsObj obj : csOld.getStatsObj()) { + map.put(obj.getColName(), obj); + } } for (int index = 0; index < csNew.getStatsObj().size(); index++) { ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); @@ -1089,6 +1069,13 @@ public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) statsObjOld); merger.merge(statsObjNew, statsObjOld); } + // TODO: the old code seemingly assumed that stats will be valid if the pre-merge stats are absent. + // That doesn't seem to make sense... when called with merge, the stats would be partial. + // So, if we take the old stats state for some cols into account at all, and the state is + // absent for other columns, we should assume that the "old state" for them is invalid. + // TODO## make sure that the callers actually set to compliant + statsObjNew.setIsStatsCompliant( + statsObjNew.isIsStatsCompliant() && statsObjOld != null && statsObjOld.isIsStatsCompliant()); // If statsObjOld is not found, we just use statsObjNew as it is accurate. list.add(statsObjNew); } diff --git standalone-metastore/metastore-server/src/main/resources/package.jdo standalone-metastore/metastore-server/src/main/resources/package.jdo index 2a5f016b1fc4cef14edb830300722d70b0e6c513..a27febefb7fca488cce4a4bde2995b3cdfd3e743 100644 --- standalone-metastore/metastore-server/src/main/resources/package.jdo +++ standalone-metastore/metastore-server/src/main/resources/package.jdo @@ -213,6 +213,9 @@ + + + @@ -495,6 +498,9 @@ + + + @@ -995,6 +1001,12 @@ + + + + + + @@ -1065,7 +1077,14 @@ + + + + + + + diff --git standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql index c889bbdf96b887b29be858e41ee854f0731cd5cd..f7352f683259cb9562f94e005cf654ae8cda6757 100644 --- standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql +++ standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-4.0.0.derby.sql @@ -47,7 +47,7 @@ CREATE TABLE "APP"."IDXS" ("INDEX_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT CREATE TABLE "APP"."INDEX_PARAMS" ("INDEX_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000)); -CREATE TABLE "APP"."PARTITIONS" ("PART_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "LAST_ACCESS_TIME" INTEGER NOT NULL, "PART_NAME" VARCHAR(767), "SD_ID" BIGINT, "TBL_ID" BIGINT, "WRITE_ID" BIGINT DEFAULT 0); +CREATE TABLE "APP"."PARTITIONS" ("PART_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "LAST_ACCESS_TIME" INTEGER NOT NULL, "PART_NAME" VARCHAR(767), "SD_ID" BIGINT, "TBL_ID" BIGINT, "WRITE_ID" BIGINT DEFAULT 0, "STATS_ACCURATE" CHAR(1) NOT NULL DEFAULT 'N'); CREATE TABLE "APP"."SERDES" ("SERDE_ID" BIGINT NOT NULL, "NAME" VARCHAR(128), "SLIB" VARCHAR(4000), "DESCRIPTION" VARCHAR(4000), "SERIALIZER_CLASS" VARCHAR(4000), "DESERIALIZER_CLASS" VARCHAR(4000), SERDE_TYPE INTEGER); @@ -75,7 +75,7 @@ CREATE TABLE "APP"."COLUMNS" ("SD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(256), " CREATE TABLE "APP"."ROLES" ("ROLE_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "OWNER_NAME" VARCHAR(128), "ROLE_NAME" VARCHAR(128)); -CREATE TABLE "APP"."TBLS" ("TBL_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "DB_ID" BIGINT, "LAST_ACCESS_TIME" INTEGER NOT NULL, "OWNER" VARCHAR(767), "OWNER_TYPE" VARCHAR(10), "RETENTION" INTEGER NOT NULL, "SD_ID" BIGINT, "TBL_NAME" VARCHAR(256), "TBL_TYPE" VARCHAR(128), "VIEW_EXPANDED_TEXT" LONG VARCHAR, "VIEW_ORIGINAL_TEXT" LONG VARCHAR, "IS_REWRITE_ENABLED" CHAR(1) NOT NULL DEFAULT 'N', "WRITE_ID" BIGINT DEFAULT 0); +CREATE TABLE "APP"."TBLS" ("TBL_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "DB_ID" BIGINT, "LAST_ACCESS_TIME" INTEGER NOT NULL, "OWNER" VARCHAR(767), "OWNER_TYPE" VARCHAR(10), "RETENTION" INTEGER NOT NULL, "SD_ID" BIGINT, "TBL_NAME" VARCHAR(256), "TBL_TYPE" VARCHAR(128), "VIEW_EXPANDED_TEXT" LONG VARCHAR, "VIEW_ORIGINAL_TEXT" LONG VARCHAR, "IS_REWRITE_ENABLED" CHAR(1) NOT NULL DEFAULT 'N', "WRITE_ID" BIGINT DEFAULT 0, "STATS_ACCURATE" CHAR(1) NOT NULL DEFAULT 'N' ); CREATE TABLE "APP"."PARTITION_KEYS" ("TBL_ID" BIGINT NOT NULL, "PKEY_COMMENT" VARCHAR(4000), "PKEY_NAME" VARCHAR(128) NOT NULL, "PKEY_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); @@ -106,7 +106,9 @@ CREATE TABLE "APP"."TAB_COL_STATS"( "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL, - "BIT_VECTOR" BLOB + "BIT_VECTOR" BLOB, + "WRITE_ID" BIGINT DEFAULT 0, + "STATS_ACCURATE" CHAR(1) NOT NULL DEFAULT 'N' ); ); CREATE TABLE "APP"."TABLE_PARAMS" ("TBL_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" CLOB); @@ -155,7 +157,9 @@ CREATE TABLE "APP"."PART_COL_STATS"( "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, - "PART_ID" BIGINT NOT NULL + "PART_ID" BIGINT NOT NULL, + "WRITE_ID" BIGINT DEFAULT 0, + "STATS_ACCURATE" CHAR(1) NOT NULL DEFAULT 'N' ); CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255)); diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/TestStatsSetupConst.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/TestStatsSetupConst.java deleted file mode 100644 index 24689215c2ff4156df7a9ea16e642c8ba84c54af..0000000000000000000000000000000000000000 --- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/TestStatsSetupConst.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.common; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - -import java.util.HashMap; -import java.util.Map; - -import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; -import org.junit.Test; - -import com.google.common.collect.Lists; -import org.junit.experimental.categories.Category; - -@Category(MetastoreUnitTest.class) -public class TestStatsSetupConst { - - @Test - public void testSetBasicStatsState_missesUpgrade() { - Map params=new HashMap<>(); - params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, "FALSE"); - StatsSetupConst.setBasicStatsState(params, String.valueOf(true)); - assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); - } - - @Test - public void setColumnStatsState_camelcase() { - Map params=new HashMap<>(); - StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo")); - String val1 = params.get(StatsSetupConst.COLUMN_STATS_ACCURATE); - StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo")); - String val2 = params.get(StatsSetupConst.COLUMN_STATS_ACCURATE); - assertEquals(val1, val2); - } - - @Test - public void testSetBasicStatsState_none() { - Map params=new HashMap<>(); - StatsSetupConst.setBasicStatsState(params, String.valueOf(true)); - assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); - } - - @Test - public void testSetBasicStatsState_falseIsAbsent() { - Map params=new HashMap<>(); - StatsSetupConst.setBasicStatsState(params, String.valueOf(true)); - StatsSetupConst.setBasicStatsState(params, String.valueOf(false)); - assertNull(params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); - } - - // earlier implementation have quoted boolean values...so the new implementation should preserve this - @Test - public void testStatColumnEntriesCompat() { - Map params0=new HashMap<>(); - StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); - StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo")); - - assertEquals("{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); - } - - @Test - public void testColumnEntries_orderIndependence() { - Map params0=new HashMap<>(); - StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); - StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo","Bar")); - Map params1=new HashMap<>(); - StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("Bar","Foo")); - StatsSetupConst.setBasicStatsState(params1, String.valueOf(true)); - - assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); - } - - @Test - public void testColumnEntries_orderIndependence2() { - Map params0=new HashMap<>(); - // in case jackson is able to deserialize...it may use a different implementation for the map - which may not preserve order - StatsSetupConst.setBasicStatsState(params0, String.valueOf(true)); - StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year")); - StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year","month")); - Map params1=new HashMap<>(); - StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("month","year")); - StatsSetupConst.setBasicStatsState(params1, String.valueOf(true)); - - System.out.println(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); - assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); - } - - // FIXME: current objective is to keep the previous outputs...but this is possibly bad.. - @Test - public void testColumnEntries_areKept_whenBasicIsAbsent() { - Map params=new HashMap<>(); - StatsSetupConst.setBasicStatsState(params, String.valueOf(false)); - StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo")); - assertEquals("{\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE)); - } -} diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 09c2509b3d186383f5f7c6285c6212c0b52371fe..be614011812df35b48f461320e3b9f4630cbe0f9 100644 --- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -1260,9 +1260,31 @@ public void cleanWriteNotificationEvents(int olderThan) { } @Override - public Map> getPartitionColsWithStats(String catName, - String dbName, String tableName) throws MetaException, + public Map> getPartitionColsWithAccurateStats(String catName, + String dbName, String tableName, String validWriteId, boolean isAccurate) throws MetaException, NoSuchObjectException { return null; } + + @Override + public Table alterTableBasicStats(String catName, String dbname, String name, + Map stats, boolean isAccurate, long writeId, + String validWriteIds) throws MetaException { + return null; + } + + @Override + public Partition alterPartitionBasicStats(String catName, String dbname, + String name, List partVals, Map stats, + boolean isAccurate, long writeId, String validWriteIds) + throws MetaException { + return null; + } + + @Override + public Map invalidateAllColumnStatistics(String catName, + String dbName, String tblName, List partNames, long writeId) + throws MetaException, NoSuchObjectException { + return null; + } } diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index 3aebaf34197441895b190f4c9764f509ae13f712..be179bd7ebfde24ce80496291978239d3b7bbc71 100644 --- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -1235,8 +1235,8 @@ public int deleteRuntimeStats(int maxRetainSecs) throws MetaException { } @Override - public Map> getPartitionColsWithStats(String catName, - String dbName, String tableName) throws MetaException, + public Map> getPartitionColsWithAccurateStats(String catName, + String dbName, String tableName, String validWriteId, boolean isAccurate) throws MetaException, NoSuchObjectException { return null; } @@ -1249,4 +1249,26 @@ public void cleanWriteNotificationEvents(int olderThan) { public List getAllWriteEventInfo(long txnId, String dbName, String tableName) throws MetaException { return null; } + + @Override + public Table alterTableBasicStats(String catName, String dbname, String name, + Map stats, boolean isAccurate, long writeId, + String validWriteIds) throws MetaException { + return null; + } + + @Override + public Partition alterPartitionBasicStats(String catName, String dbname, + String name, List partVals, Map stats, + boolean isAccurate, long writeId, String validWriteIds) + throws MetaException { + return null; + } + + @Override + public Map invalidateAllColumnStatistics(String catName, + String dbName, String tblName, List partNames, long writeId) + throws MetaException, NoSuchObjectException { + return null; + } } diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java index 34055d2d4d39dc63d505a5ef95d190aa80a49d14..8a41afdce78bc525d2e83d4b27d70d4c786eb44e 100644 --- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClientPreCatalog.java @@ -3528,4 +3528,25 @@ public void truncateTable(String dbName, String tableName, throws TException { throw new UnsupportedOperationException(); } + + @Override + public void alterTableBasicStats(String catName, String dbName, + String tblName, boolean isValid, Map basicStats, + long writeId, String validWriteIds) throws TException { + throw new UnsupportedOperationException(); + } + + @Override + public void alterPartitionBasicStats(String catName, String dbName, + String tblName, String partName, boolean isValid, + Map basicStats, long writeId, String validWriteIds) + throws TException { + throw new UnsupportedOperationException(); + } + + @Override + public void invalidateAllColumnStats(String catName, String dbName, + String tableName, String partName, long writeId) { + throw new UnsupportedOperationException(); + } } diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java index d5ae5d1c0d4658335af92e4472bd3985d9f9493f..36e77119658fc05ba648ad09c7656c270f3e17be 100644 --- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java @@ -42,12 +42,11 @@ import java.util.List; import java.util.Map; -import static org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE; import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_FILES; import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_ERASURE_CODED_FILES; import static org.apache.hadoop.hive.common.StatsSetupConst.STATS_GENERATED; import static org.apache.hadoop.hive.common.StatsSetupConst.TOTAL_SIZE; -import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.updateTableStatsSlow; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.updateTableFsStatsSlow; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; @@ -158,7 +157,7 @@ public void testUpdateTableStatsSlow_statsUpdated() throws TException { TOTAL_SIZE, String.valueOf(2 * fileLength), NUM_ERASURE_CODED_FILES, "1" ); - updateTableStatsSlow(db, tbl, wh, false, false, null); + updateTableFsStatsSlow(db, tbl, wh, false, false, null); assertThat(tbl.getParameters(), is(expected)); // Verify that when stats are already present and forceRecompute is specified they are recomputed @@ -170,39 +169,8 @@ public void testUpdateTableStatsSlow_statsUpdated() throws TException { .addTableParam(TOTAL_SIZE, "0") .build(null); when(wh.getFileStatusesForUnpartitionedTable(db, tbl1)).thenReturn(fileStatus); - updateTableStatsSlow(db, tbl1, wh, false, true, null); + updateTableFsStatsSlow(db, tbl1, wh, false, true, null); assertThat(tbl1.getParameters(), is(expected)); - - // Verify that COLUMN_STATS_ACCURATE is removed from params - Table tbl2 = new TableBuilder() - .setDbName(DB_NAME) - .setTableName(TABLE_NAME) - .addCol("id", "int") - .addTableParam(COLUMN_STATS_ACCURATE, "true") - .build(null); - when(wh.getFileStatusesForUnpartitionedTable(db, tbl2)).thenReturn(fileStatus); - updateTableStatsSlow(db, tbl2, wh, false, true, null); - assertThat(tbl2.getParameters(), is(expected)); - - EnvironmentContext context = new EnvironmentContext(ImmutableMap.of(STATS_GENERATED, - StatsSetupConst.TASK)); - - // Verify that if environment context has STATS_GENERATED set to task, - // COLUMN_STATS_ACCURATE in params is set to correct value - Table tbl3 = new TableBuilder() - .setDbName(DB_NAME) - .setTableName(TABLE_NAME) - .addCol("id", "int") - .addTableParam(COLUMN_STATS_ACCURATE, "foo") // The value doesn't matter - .build(null); - when(wh.getFileStatusesForUnpartitionedTable(db, tbl3)).thenReturn(fileStatus); - updateTableStatsSlow(db, tbl3, wh, false, true, context); - - Map expected1 = ImmutableMap.of(NUM_FILES, "2", - TOTAL_SIZE, String.valueOf(2 * fileLength), - NUM_ERASURE_CODED_FILES, "1", - COLUMN_STATS_ACCURATE, "{\"BASIC_STATS\":\"true\"}"); - assertThat(tbl3.getParameters(), is(expected1)); } /** @@ -224,10 +192,10 @@ public void testUpdateTableStatsSlow_removesDoNotUpdateStats() throws TException .addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "false") .build(null); Warehouse wh = mock(Warehouse.class); - updateTableStatsSlow(db, tbl, wh, false, true, null); + updateTableFsStatsSlow(db, tbl, wh, false, true, null); assertThat(tbl.getParameters(), is(Collections.emptyMap())); verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl); - updateTableStatsSlow(db, tbl1, wh, true, false, null); + updateTableFsStatsSlow(db, tbl1, wh, true, false, null); assertThat(tbl.getParameters(), is(Collections.emptyMap())); verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl1); } @@ -253,7 +221,7 @@ public void testUpdateTableStatsSlow_doesNotUpdateStats() throws TException { .build(null); Warehouse wh = mock(Warehouse.class); // newDir(true) => stats not updated - updateTableStatsSlow(db, tbl, wh, true, false, null); + updateTableFsStatsSlow(db, tbl, wh, true, false, null); verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl); // partitioned table => stats not updated @@ -263,7 +231,7 @@ public void testUpdateTableStatsSlow_doesNotUpdateStats() throws TException { .addCol("id", "int") .setPartCols(cols) .build(null); - updateTableStatsSlow(db, tbl1, wh, false, false, null); + updateTableFsStatsSlow(db, tbl1, wh, false, false, null); verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl1); // Already contains stats => stats not updated when forceRecompute isn't set @@ -273,7 +241,7 @@ public void testUpdateTableStatsSlow_doesNotUpdateStats() throws TException { .addCol("id", "int") .setTableParams(paramsWithStats) .build(null); - updateTableStatsSlow(db, tbl2, wh, false, false, null); + updateTableFsStatsSlow(db, tbl2, wh, false, false, null); verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl2); }