diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 4fc0a93b61..17550791e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -1246,8 +1246,6 @@ private int alterMaterializedView(Hive db, AlterMaterializedViewDesc alterMVDesc // It can be fully qualified name or use default database Table oldMV = db.getTable(mvName); Table mv = oldMV.copy(); // Do not mess with Table instance - EnvironmentContext environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); switch (alterMVDesc.getOp()) { case UPDATE_REWRITE_FLAG: @@ -1262,7 +1260,7 @@ private int alterMaterializedView(Hive db, AlterMaterializedViewDesc alterMVDesc throw new AssertionError("Unsupported alter materialized view type! : " + alterMVDesc.getOp()); } - db.alterTable(mv, environmentContext); + db.alterTable(mv, null); return 0; } @@ -1436,7 +1434,6 @@ private int touch(Hive db, AlterTableSimpleDesc touchDesc) Table tbl = db.getTable(touchDesc.getTableName()); EnvironmentContext environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); if (touchDesc.getPartSpec() == null) { db.alterTable(tbl, environmentContext); @@ -3909,11 +3906,6 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition environmentContext = new EnvironmentContext(); alterTbl.setEnvironmentContext(environmentContext); } - // do not need update stats in alter table/partition operations - if (environmentContext.getProperties() == null || - environmentContext.getProperties().get(StatsSetupConst.DO_NOT_UPDATE_STATS) == null) { - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); - } if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) { tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName())); @@ -4154,8 +4146,7 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition } catch (URISyntaxException e) { throw new HiveException(e); } - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - + Hive.collectFsStats(tbl, part, conf); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSKEWEDBY) { // Validation's been done at compile time. no validation is needed here. List skewedColNames = null; @@ -4202,7 +4193,7 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition } } - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); + Hive.collectFsStats(tbl, part, conf); } else if (alterTbl.getOp() == AlterTableTypes.ALTERBUCKETNUM) { if (part != null) { if (part.getBucketCount() == alterTbl.getNumberBuckets()) { @@ -4224,12 +4215,6 @@ private static StorageDescriptor retrieveStorageDescriptor(Table tbl, Partition private List> alterTableDropProps(AlterTableDesc alterTbl, Table tbl, Partition part, EnvironmentContext environmentContext) throws HiveException { - if (StatsSetupConst.USER.equals(environmentContext.getProperties() - .get(StatsSetupConst.STATS_GENERATED))) { - // drop a stats parameter, which triggers recompute stats update automatically - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - } - List> result = null; if (part == null) { Set removedSet = alterTbl.getProps().keySet(); @@ -4365,10 +4350,6 @@ private static void ensureDelete(FileSystem fs, Path path, String what) throws I private List> alterTableAddProps(AlterTableDesc alterTbl, Table tbl, Partition part, EnvironmentContext environmentContext) throws HiveException { - if (StatsSetupConst.USER.equals(environmentContext.getProperties() - .get(StatsSetupConst.STATS_GENERATED))) { - environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - } List> result = null; if (part != null) { part.getTPartition().getParameters().putAll(alterTbl.getProps()); @@ -4757,6 +4738,11 @@ private int createTable(Hive db, CreateTableDesc crtTbl) throws HiveException { List defaultConstraints = crtTbl.getDefaultConstraints(); LOG.debug("creating table {} on {}",tbl.getFullyQualifiedName(),tbl.getDataLocation()); + if (tbl.getTableType() != TableType.MANAGED_TABLE) { + // should collect fs level stat info + Hive.collectFsStats(tbl, null, conf); + } + if (crtTbl.getReplicationSpec().isInReplicationScope() && (!crtTbl.getReplaceMode())){ // if this is a replication spec, then replace-mode semantics might apply. // if we're already asking for a table replacement, then we can skip this check. @@ -4942,6 +4928,8 @@ private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exceptio makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName(), conf); } + // FIXME: this is missing; and it's definetly an improvement; but separating it from the current changeset + // Hive.collectFsStats(tbl, null, conf); if (crtTbl.getLocation() == null && !tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(), diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index c0be51e0b2..6e54858483 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -162,6 +162,8 @@ import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType; import org.apache.hadoop.hive.ql.session.CreateTableAutomaticGrant; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.stats.FSStatsUtils; +import org.apache.hadoop.hive.ql.stats.Partish; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -173,7 +175,6 @@ import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import com.google.common.base.Splitter; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; @@ -181,6 +182,7 @@ import com.google.common.collect.Sets; import com.google.common.util.concurrent.ThreadFactoryBuilder; + /** * This class has functions that implement meta data/DDL operations using calls * to the metastore. @@ -1689,7 +1691,9 @@ else if(!isAcidIUDoperation && isFullAcidTable) { StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters(), MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); } - MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters()); + FSStatsUtils.populateQuickStats( + HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), + newTPart.getParameters()); try { LOG.debug("Adding new partition " + newTPart.getSpec()); getSynchronizedMSC().add_partition(newTPart.getTPartition()); @@ -1705,7 +1709,7 @@ else if(!isAcidIUDoperation && isFullAcidTable) { // insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException // In that case, we want to retry with alterPartition. LOG.debug("Caught AlreadyExistsException, trying to alter partition instead"); - setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart); + setStatsPropAndAlterPartition(tbl, newTPart); } catch (Exception e) { try { final FileSystem newPathFileSystem = newPartPath.getFileSystem(this.getConf()); @@ -1718,7 +1722,7 @@ else if(!isAcidIUDoperation && isFullAcidTable) { throw e; } } else { - setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart); + setStatsPropAndAlterPartition(tbl, newTPart); } return newTPart; } catch (IOException e) { @@ -1815,16 +1819,9 @@ public boolean accept(Path path) { return newFiles; } - private void setStatsPropAndAlterPartition(boolean hasFollowingStatsTask, Table tbl, - Partition newTPart) throws MetaException, TException { - EnvironmentContext environmentContext = null; - if (hasFollowingStatsTask) { - environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); - } + private void setStatsPropAndAlterPartition(Table tbl, Partition newTPart) throws MetaException, TException { LOG.debug("Altering existing partition " + newTPart.getSpec()); - getSynchronizedMSC().alter_partition(tbl.getDbName(), tbl.getTableName(), - newTPart.getTPartition(), environmentContext); + getSynchronizedMSC().alter_partition(tbl.getDbName(), tbl.getTableName(), newTPart.getTPartition(), null); } /** @@ -2230,9 +2227,8 @@ else if(!isAcidIUDoperation && isFullAcidTable) { } EnvironmentContext environmentContext = null; - if (hasFollowingStatsTask) { - environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); + if (!hasFollowingStatsTask) { + collectFsStats(tbl, null, conf); } alterTable(tbl, environmentContext); @@ -4879,4 +4875,30 @@ public void createOrDropTriggerToPoolMapping(String resourcePlanName, String tri throw new HiveException(e); } } + + + /** + * Collects only filesystem level stats. + * + * For complete stat collection StatsTask should run. + */ + public static void collectFsStats(Table tbl, Partition part, HiveConf conf) throws HiveException { + Partish p; + if (tbl.isPartitioned() && part != null) { + p = Partish.buildFor(tbl, part); + } else { + p = Partish.buildFor(tbl); + } + Partish partish = p; + + try { + // FIXME: move this wh creation somewhere else? + Warehouse wh = new Warehouse(conf); + StatsSetupConst.setBasicStatsState(p.getPartParameters(), StatsSetupConst.FALSE); + FileStatus[] partfileStatus = FSStatsUtils.getFileStatusesForSD(conf, partish.getPartSd()); + FSStatsUtils.populateQuickStats(partfileStatus, p.getPartParameters()); + } catch (MetaException e) { + throw new HiveException(e); + } + } }; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index d79b6ed059..e9fe2c112e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -562,7 +562,6 @@ private boolean needToUpdateStats(Map props, EnvironmentContext e } //first set basic stats to true StatsSetupConst.setBasicStatsState(props, StatsSetupConst.TRUE); - environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK); //then invalidate column stats StatsSetupConst.clearColumnStatsState(props); return statsPresent; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 5761795b1c..93461cd5e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1719,11 +1719,6 @@ else if(entry.getKey().equals("external") && entry.getValue().equals("true")){ + StatsSetupConst.RAW_DATA_SIZE); } } - - if (changeStatsSucceeded) { - environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.USER); - } } AlterTableDesc alterTblDesc = null; if (isUnset == true) { @@ -1742,6 +1737,12 @@ else if(entry.getKey().equals("external") && entry.getValue().equals("true")){ || mapProp.containsKey(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); addInputsOutputsAlterTable(tableName, partSpec, alterTblDesc, isPotentialMmSwitch); + if (changeStatsSucceeded) { + // mark stats as unuseable by setting it empty; + // because of alter call: can't use setBasicStatsState() + mapProp.put(StatsSetupConst.COLUMN_STATS_ACCURATE, "{}"); + } + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java index 946c300750..72d1ec019a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.StatsTask; @@ -311,9 +310,6 @@ private int updatePartitions(Hive db, List scs, Table table } } - EnvironmentContext environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); - ImmutableListMultimap collectorsByTable = Multimaps.index(validColectors, FooterStatCollector.SIMPLE_NAME_FUNCTION); LOG.debug("Collectors.size(): {}", collectorsByTable.keySet()); @@ -335,12 +331,12 @@ private int updatePartitions(Hive db, List scs, Table table } if (values.get(0).result instanceof Table) { - db.alterTable(tableFullName, (Table) values.get(0).result, environmentContext); + db.alterTable(tableFullName, (Table) values.get(0).result, null); LOG.debug("Updated stats for {}.", tableFullName); } else { if (values.get(0).result instanceof Partition) { List results = Lists.transform(values, FooterStatCollector.EXTRACT_RESULT_FUNCTION); - db.alterPartitions(tableFullName, results, environmentContext); + db.alterPartitions(tableFullName, results, null); LOG.debug("Bulk updated {} partitions of {}.", results.size(), tableFullName); } else { throw new RuntimeException("inconsistent"); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index 1d7660e8b2..605476ae01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -35,7 +35,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -164,12 +163,12 @@ public Object process(StatsAggregator statsAggregator) throws HiveException, Met return p.getOutput(); } - public void collectFileStatus(Warehouse wh) throws MetaException { - partfileStatus = wh.getFileStatusesForSD(partish.getPartSd()); + public void collectFileStatus(HiveConf conf) throws MetaException { + partfileStatus = FSStatsUtils.getFileStatusesForSD(conf, partish.getPartSd()); } private void updateQuickStats(Map parameters, FileStatus[] partfileStatus) throws MetaException { - MetaStoreUtils.populateQuickStats(partfileStatus, parameters); + FSStatsUtils.populateQuickStats(partfileStatus, parameters); } private String getAggregationPrefix(Table table, Partition partition) throws MetaException { @@ -217,9 +216,6 @@ private int aggregateStats(Hive db) { StatsAggregator statsAggregator = null; int ret = 0; StatsCollectionContext scc = null; - EnvironmentContext environmentContext = null; - environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); try { // Stats setup: @@ -247,12 +243,12 @@ private int aggregateStats(Hive db) { partishes.add(p = new Partish.PTable(table)); BasicStatsProcessor basicStatsProcessor = new BasicStatsProcessor(p, work, conf, followedColStats); - basicStatsProcessor.collectFileStatus(wh); + basicStatsProcessor.collectFileStatus(conf); Table res = (Table) basicStatsProcessor.process(statsAggregator); if (res == null) { return 0; } - db.alterTable(tableFullName, res, environmentContext); + db.alterTable(tableFullName, res, null); if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { console.printInfo("Table " + tableFullName + " stats: [" + toString(p.getPartParameters()) + ']'); @@ -280,7 +276,7 @@ private int aggregateStats(Hive db) { futures.add(pool.submit(new Callable() { @Override public Void call() throws Exception { - bsp.collectFileStatus(wh); + bsp.collectFileStatus(conf); return null; } })); @@ -320,7 +316,7 @@ public Void call() throws Exception { } if (!updates.isEmpty()) { - db.alterPartitions(tableFullName, updates, environmentContext); + db.alterPartitions(tableFullName, updates, null); } if (work.isStatsReliable() && updates.size() != processors.size()) { LOG.info("Stats should be reliadble...however seems like there were some issue.. => ret 1"); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/FSStatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/FSStatsUtils.java new file mode 100644 index 0000000000..4dd948e55c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/FSStatsUtils.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.utils.FileUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; + +public class FSStatsUtils { + + /** + * Collects the file statuses. + * + * @return array of FileStatus objects corresponding to the files + * making up the passed storage description + */ + public static FileStatus[] getFileStatusesForSD(Configuration conf, StorageDescriptor desc) throws MetaException { + try { + Path path = new Path(desc.getLocation()); + FileSystem fileSys = path.getFileSystem(conf); + return FileUtils.getFileStatusRecurse(path, -1, fileSys); + } catch (IOException ioe) { + MetaStoreUtils.logAndThrowMetaException(ioe); + } + return null; + } + + public static void populateQuickStats(FileStatus[] fileStatus, Map params) { + int numFiles = 0; + long tableSize = 0L; + for (FileStatus status : fileStatus) { + // don't take directories into account for quick stats + if (!status.isDir()) { + tableSize += status.getLen(); + numFiles += 1; + } + } + params.put(StatsSetupConst.NUM_FILES, Integer.toString(numFiles)); + params.put(StatsSetupConst.TOTAL_SIZE, Long.toString(tableSize)); + } + +} diff --git ql/src/test/queries/clientpositive/explainanalyze_5.q ql/src/test/queries/clientpositive/explainanalyze_5.q index 696064c874..df5df9aadb 100644 --- ql/src/test/queries/clientpositive/explainanalyze_5.q +++ ql/src/test/queries/clientpositive/explainanalyze_5.q @@ -76,6 +76,7 @@ create table acid_dot( select count(*) from acid_dot; +desc formatted acid_dot; explain analyze delete from acid_dot where cint < -1070551679; select count(*) from acid_dot; diff --git ql/src/test/results/clientpositive/alter_table_stats_status.q.out ql/src/test/results/clientpositive/alter_table_stats_status.q.out index 7ca949d437..47aad8771f 100644 --- ql/src/test/results/clientpositive/alter_table_stats_status.q.out +++ ql/src/test/results/clientpositive/alter_table_stats_status.q.out @@ -172,6 +172,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {} #### A masked pattern was here #### numFiles 1 numRows 1000 @@ -480,6 +481,7 @@ Database: statsdb Table: srcpart #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE {} #### A masked pattern was here #### numFiles 1 numRows 1000 diff --git ql/src/test/results/clientpositive/autoColumnStats_4.q.out ql/src/test/results/clientpositive/autoColumnStats_4.q.out index 1f4c0adfc7..9b7b12e96f 100644 --- ql/src/test/results/clientpositive/autoColumnStats_4.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -237,7 +237,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {} numFiles 4 numRows 0 rawDataSize 0 diff --git ql/src/test/results/clientpositive/create_table_like_stats.q.out ql/src/test/results/clientpositive/create_table_like_stats.q.out index f88c50cfeb..45cc800436 100644 --- ql/src/test/results/clientpositive/create_table_like_stats.q.out +++ ql/src/test/results/clientpositive/create_table_like_stats.q.out @@ -79,8 +79,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - numFiles 0 - totalSize 0 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/default_file_format.q.out ql/src/test/results/clientpositive/default_file_format.q.out index 9eb7d2d4b8..81dce2d667 100644 --- ql/src/test/results/clientpositive/default_file_format.q.out +++ ql/src/test/results/clientpositive/default_file_format.q.out @@ -230,8 +230,6 @@ Retention: 0 Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE - numFiles 0 - totalSize 0 #### A masked pattern was here #### # Storage Information @@ -462,8 +460,6 @@ Retention: 0 Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE - numFiles 0 - totalSize 0 #### A masked pattern was here #### # Storage Information @@ -526,8 +522,6 @@ Retention: 0 Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE - numFiles 0 - totalSize 0 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/describe_syntax.q.out ql/src/test/results/clientpositive/describe_syntax.q.out index a0c6182c3f..1f582361ae 100644 --- ql/src/test/results/clientpositive/describe_syntax.q.out +++ ql/src/test/results/clientpositive/describe_syntax.q.out @@ -215,7 +215,6 @@ num_trues num_falses bitVector comment from deserializer -COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE db1.t1 key1 PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 @@ -248,7 +247,6 @@ num_trues num_falses bitVector comment from deserializer -COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE t1 key1 PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 @@ -281,7 +279,6 @@ num_trues num_falses bitVector comment from deserializer -COLUMN_STATS_ACCURATE {} PREHOOK: query: DESCRIBE t1 PARTITION(ds='4', part='5') PREHOOK: type: DESCTABLE PREHOOK: Input: db1@t1 diff --git ql/src/test/results/clientpositive/partition_coltype_literals.q.out ql/src/test/results/clientpositive/partition_coltype_literals.q.out index adabbb2e10..f505c1e6bb 100644 --- ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -609,12 +609,11 @@ Database: default Table: partcoltypenum #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} #### A masked pattern was here #### - numFiles 1 + numFiles 0 numRows 10 rawDataSize 104 - totalSize 114 + totalSize 0 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 7f18f2b42b..7dce8ce496 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -382,6 +382,49 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_dot POSTHOOK: Output: hdfs://### HDFS PATH ### 12288 +PREHOOK: query: desc formatted acid_dot +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@acid_dot +POSTHOOK: query: desc formatted acid_dot +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@acid_dot +# col_name data_type comment +ctinyint tinyint +csmallint smallint +cint int +cbigint bigint +cfloat float +cdouble double +cstring1 string +cstring2 string +ctimestamp1 timestamp +ctimestamp2 timestamp +cboolean1 boolean +cboolean2 boolean + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + totalSize 377237 + transactional true + transactional_properties default +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: 1 +Bucket Columns: [cint] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: delete from acid_dot where cint < -1070551679 PREHOOK: type: QUERY PREHOOK: Input: default@acid_dot diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 59190893e6..d6f39afcbb 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -126,19 +126,6 @@ public String getAggregator(Configuration conf) { */ public static final String[] fastStats = new String[] {NUM_FILES,TOTAL_SIZE}; - // This string constant is used to indicate to AlterHandler that - // alterPartition/alterTable is happening via statsTask or via user. - public static final String STATS_GENERATED = "STATS_GENERATED"; - - public static final String TASK = "TASK"; - - public static final String USER = "USER"; - - // This string constant is used by AlterHandler to figure out that it should not attempt to - // update stats. It is set by any client-side task which wishes to signal that no stats - // update should take place, such as with replication. - public static final String DO_NOT_UPDATE_STATS = "DO_NOT_UPDATE_STATS"; - //This string constant will be persisted in metastore to indicate whether corresponding //table or partition's statistics and table or partition's column statistics are accurate or not. public static final String COLUMN_STATS_ACCURATE = "COLUMN_STATS_ACCURATE"; @@ -178,6 +165,7 @@ public void serialize(Boolean value, JsonGenerator jsonGenerator, static class BooleanDeserializer extends JsonDeserializer { + @Override public Boolean deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException { @@ -265,7 +253,7 @@ public static boolean canColumnStatsMerge(Map params, String col ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); return stats.columnStats.containsKey(colName); } - + public static void clearColumnStatsState(Map params) { if (params == null) { return; @@ -274,7 +262,11 @@ public static void clearColumnStatsState(Map params) { stats.columnStats.clear(); try { - params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + if (!stats.basicStats && stats.columnStats.isEmpty()) { + params.remove(COLUMN_STATS_ACCURATE); + } else { + params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); + } } catch (JsonProcessingException e) { LOG.trace(e.getMessage()); } @@ -289,7 +281,11 @@ public static void removeColumnStatsState(Map params, List params, setColumnStatsState(params, cols); } } - + private static ColumnStatsAccurate parseStatsAcc(String statsAcc) { if (statsAcc == null) { return new ColumnStatsAccurate(); diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 89354a2d34..aa265a8aa1 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -292,13 +292,6 @@ public void alterTable(RawStore msdb, Warehouse wh, String dbname, alterTableUpdateTableColumnStats(msdb, oldt, newt); } } else { - // operations other than table rename - if (MetaStoreUtils.requireCalStats(null, null, newt, environmentContext) && - !isPartitionedTable) { - Database db = msdb.getDatabase(newDbName); - // Update table stats. For partitioned table, we update stats in alterPartition() - MetaStoreUtils.updateTableStatsFast(db, newt, wh, false, true, environmentContext); - } if (isPartitionedTable) { //Currently only column related changes can be cascaded in alter table @@ -447,14 +440,6 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String try { msdb.openTransaction(); oldPart = msdb.getPartition(dbname, name, new_part.getValues()); - if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) { - // if stats are same, no need to update - if (MetaStoreUtils.isFastStatsSame(oldPart, new_part)) { - MetaStoreUtils.updateBasicState(environmentContext, new_part.getParameters()); - } else { - MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true, environmentContext); - } - } // PartitionView does not have SD. We do not need update its column stats if (oldPart.getSd() != null) { @@ -580,10 +565,6 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String new_part.getSd().setLocation(oldPart.getSd().getLocation()); } - if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) { - MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true, environmentContext); - } - String newPartName = Warehouse.makePartName(tbl.getPartitionKeys(), new_part.getValues()); ColumnStatistics cs = updateOrGetPartitionColumnStats(msdb, dbname, name, oldPart.getValues(), oldPart.getSd().getCols(), tbl, new_part, null); @@ -672,15 +653,6 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String oldParts.add(oldTmpPart); partValsList.add(tmpPart.getValues()); - if (MetaStoreUtils.requireCalStats(oldTmpPart, tmpPart, tbl, environmentContext)) { - // Check if stats are same, no need to update - if (MetaStoreUtils.isFastStatsSame(oldTmpPart, tmpPart)) { - MetaStoreUtils.updateBasicState(environmentContext, tmpPart.getParameters()); - } else { - MetaStoreUtils.updatePartitionStatsFast(tmpPart, wh, false, true, environmentContext); - } - } - // PartitionView does not have SD and we do not need to update its column stats if (oldTmpPart.getSd() != null) { updateOrGetPartitionColumnStats(msdb, dbname, name, oldTmpPart.getValues(), diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 662de9a667..8cc5a686f3 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1486,10 +1486,6 @@ private void create_table_core(final RawStore ms, final Table tbl, madeDir = true; } } - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updateTableStatsFast(db, tbl, wh, madeDir, envContext); - } // set create time long time = System.currentTimeMillis() / 1000; @@ -2235,7 +2231,6 @@ private void updateStatsForTruncate(Map props, EnvironmentContext } //first set basic stats to true StatsSetupConst.setBasicStatsState(props, StatsSetupConst.TRUE); - environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK); //then invalidate column stats StatsSetupConst.clearColumnStatsState(props); return; @@ -2671,11 +2666,6 @@ private Partition append_partition_common(RawStore ms, String dbName, String tab part.setCreateTime((int) time); part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(time)); - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updatePartitionStatsFast(part, wh, madeDir, envContext); - } - if (ms.addPartition(part)) { if (!transactionalListeners.isEmpty()) { transactionalListenerResponses = @@ -3239,10 +3229,6 @@ private void initializeAddedPartition( private void initializeAddedPartition( final Table tbl, final PartitionSpecProxy.PartitionIterator part, boolean madeDir) throws MetaException { - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updatePartitionStatsFast(part, wh, madeDir, false, null); - } // set create time long time = System.currentTimeMillis() / 1000; @@ -6790,8 +6776,7 @@ public GetFileMetadataByExprResult get_file_metadata_by_expr(GetFileMetadataByEx getMS().getFileMetadataByExpr(fileIds, type, req.getExpr(), metadatas, ppdResults, eliminated); for (int i = 0; i < fileIds.size(); ++i) { - if (!eliminated[i] && ppdResults[i] == null) - { + if (!eliminated[i] && ppdResults[i] == null) { continue; // No metadata => no ppd. } MetadataPpdResult mpr = new MetadataPpdResult(); diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java index 20c10607bb..4b724e1e76 100755 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -50,7 +50,6 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.util.ReflectionUtils; @@ -419,7 +418,9 @@ public static boolean makeSpecFromName(Map partSpec, Path currPa } partSpec.put(key, kvs.get(i - 1)[1]); } - if (requiredKeys == null || requiredKeys.isEmpty()) return true; + if (requiredKeys == null || requiredKeys.isEmpty()) { + return true; + } LOG.warn("Cannot create partition spec from " + currPath + "; missing keys " + requiredKeys); return false; } @@ -539,51 +540,6 @@ public static String makePartName(List partCols, } /** - * @param desc - * @return array of FileStatus objects corresponding to the files - * making up the passed storage description - */ - public FileStatus[] getFileStatusesForSD(StorageDescriptor desc) - throws MetaException { - return getFileStatusesForLocation(desc.getLocation()); - } - - /** - * @param location - * @return array of FileStatus objects corresponding to the files - * making up the passed storage description - */ - public FileStatus[] getFileStatusesForLocation(String location) - throws MetaException { - try { - Path path = new Path(location); - FileSystem fileSys = path.getFileSystem(conf); - return FileUtils.getFileStatusRecurse(path, -1, fileSys); - } catch (IOException ioe) { - MetaStoreUtils.logAndThrowMetaException(ioe); - } - return null; - } - - /** - * @param db database - * @param table table - * @return array of FileStatus objects corresponding to the files making up the passed - * unpartitioned table - */ - public FileStatus[] getFileStatusesForUnpartitionedTable(Database db, Table table) - throws MetaException { - Path tablePath = getDnsPath(new Path(table.getSd().getLocation())); - try { - FileSystem fileSys = tablePath.getFileSystem(conf); - return FileUtils.getFileStatusRecurse(tablePath, -1, fileSys); - } catch (IOException ioe) { - MetaStoreUtils.logAndThrowMetaException(ioe); - } - return null; - } - - /** * Makes a valid partition name. * @param partCols The partition columns * @param vals The partition values diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 9f822564bd..712201570a 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -695,8 +695,6 @@ public static ConfVars getMetaConf(String name) { "Metastore SSL certificate truststore location."), SSL_TRUSTSTORE_PASSWORD("metastore.truststore.password", "hive.metastore.truststore.password", "", "Metastore SSL certificate truststore password."), - STATS_AUTO_GATHER("metastore.stats.autogather", "hive.stats.autogather", true, - "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), STATS_FETCH_BITVECTOR("metastore.stats.fetch.bitvector", "hive.stats.fetch.bitvector", false, "Whether we fetch bitvector when we compute ndv. Users can turn it off if they want to use old schema"), STATS_NDV_TUNER("metastore.stats.ndv.tuner", "hive.metastore.stats.ndv.tuner", 0.0, @@ -1113,16 +1111,22 @@ public static Configuration newMetastoreConf() { */ hiveSiteURL = findConfigFile(classLoader, "hive-site.xml"); } - if (hiveSiteURL != null) conf.addResource(hiveSiteURL); + if (hiveSiteURL != null) { + conf.addResource(hiveSiteURL); + } // Now add hivemetastore-site.xml. Again we add this before our own config files so that the // newer overrides the older. hiveMetastoreSiteURL = findConfigFile(classLoader, "hivemetastore-site.xml"); - if (hiveMetastoreSiteURL != null) conf.addResource(hiveMetastoreSiteURL); + if (hiveMetastoreSiteURL != null) { + conf.addResource(hiveMetastoreSiteURL); + } // Add in our conf file metastoreSiteURL = findConfigFile(classLoader, "metastore-site.xml"); - if (metastoreSiteURL != null) conf.addResource(metastoreSiteURL); + if (metastoreSiteURL != null) { + conf.addResource(metastoreSiteURL); + } // If a system property that matches one of our conf value names is set then use the value // it's set to to set our own conf value. @@ -1245,8 +1249,12 @@ public static String getVar(Configuration conf, ConfVars var, String defaultVal) public static Collection getStringCollection(Configuration conf, ConfVars var) { assert var.defaultVal.getClass() == String.class; String val = conf.get(var.varname); - if (val == null) val = conf.get(var.hiveName, (String)var.defaultVal); - if (val == null) return Collections.emptySet(); + if (val == null) { + val = conf.get(var.hiveName, (String)var.defaultVal); + } + if (val == null) { + return Collections.emptySet(); + } return StringUtils.asSet(val.split(",")); } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index b051961442..4a92b0486b 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -25,7 +25,6 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.commons.collections.ListUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -34,12 +33,9 @@ import org.apache.hadoop.hive.metastore.ColumnType; import org.apache.hadoop.hive.metastore.HiveMetaStore; import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.Decimal; -import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -55,7 +51,6 @@ import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger; import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; import org.apache.hadoop.security.SaslRpcServer; import org.apache.hadoop.security.authorize.DefaultImpersonationProvider; @@ -527,46 +522,6 @@ public static boolean isExternalTable(Table table) { return "TRUE".equalsIgnoreCase(params.get("EXTERNAL")); } - // check if stats need to be (re)calculated - public static boolean requireCalStats(Partition oldPart, - Partition newPart, Table tbl, - EnvironmentContext environmentContext) { - - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TRUE.equals(environmentContext.getProperties().get( - StatsSetupConst.DO_NOT_UPDATE_STATS))) { - return false; - } - - if (isView(tbl)) { - return false; - } - - if (oldPart == null && newPart == null) { - return true; - } - - // requires to calculate stats if new partition doesn't have it - if ((newPart == null) || (newPart.getParameters() == null) - || !containsAllFastStats(newPart.getParameters())) { - return true; - } - - if (environmentContext != null && environmentContext.isSetProperties()) { - String statsType = environmentContext.getProperties().get(StatsSetupConst.STATS_GENERATED); - // no matter STATS_GENERATED is USER or TASK, all need to re-calculate the stats: - // USER: alter table .. update statistics - // TASK: from some sql operation which could collect and compute stats - if (StatsSetupConst.TASK.equals(statsType) || StatsSetupConst.USER.equals(statsType)) { - return true; - } - } - - // requires to calculate stats if new and old have different fast stats - return !isFastStatsSame(oldPart, newPart); - } - public static boolean isView(Table table) { if (table == null) { return false; @@ -574,199 +529,10 @@ public static boolean isView(Table table) { return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType()); } - /** - * @param partParams - * @return True if the passed Parameters Map contains values for all "Fast Stats". - */ - private static boolean containsAllFastStats(Map partParams) { - for (String stat : StatsSetupConst.fastStats) { - if (!partParams.containsKey(stat)) { - return false; - } - } - return true; - } - - public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { - // requires to calculate stats if new and old have different fast stats - if ((oldPart != null) && (oldPart.getParameters() != null)) { - for (String stat : StatsSetupConst.fastStats) { - if (oldPart.getParameters().containsKey(stat)) { - Long oldStat = Long.parseLong(oldPart.getParameters().get(stat)); - Long newStat = Long.parseLong(newPart.getParameters().get(stat)); - if (!oldStat.equals(newStat)) { - return false; - } - } else { - return false; - } - } - return true; - } - return false; - } - - public static boolean updateTableStatsFast(Database db, Table tbl, Warehouse wh, - boolean madeDir, EnvironmentContext environmentContext) throws MetaException { - return updateTableStatsFast(db, tbl, wh, madeDir, false, environmentContext); - } - - public static boolean updateTableStatsFast(Database db, Table tbl, Warehouse wh, - boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { - if (tbl.getPartitionKeysSize() == 0) { - // Update stats only when unpartitioned - FileStatus[] fileStatuses = wh.getFileStatusesForUnpartitionedTable(db, tbl); - return updateTableStatsFast(tbl, fileStatuses, madeDir, forceRecompute, environmentContext); - } else { - return false; - } - } - - /** - * Updates the numFiles and totalSize parameters for the passed Table by querying - * the warehouse if the passed Table does not already have values for these parameters. - * @param tbl - * @param fileStatus - * @param newDir if true, the directory was just created and can be assumed to be empty - * @param forceRecompute Recompute stats even if the passed Table already has - * these parameters set - * @return true if the stats were updated, false otherwise - */ - public static boolean updateTableStatsFast(Table tbl, FileStatus[] fileStatus, boolean newDir, - boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { - - Map params = tbl.getParameters(); - - if ((params!=null) && params.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)){ - boolean doNotUpdateStats = Boolean.valueOf(params.get(StatsSetupConst.DO_NOT_UPDATE_STATS)); - params.remove(StatsSetupConst.DO_NOT_UPDATE_STATS); - tbl.setParameters(params); // to make sure we remove this marker property - if (doNotUpdateStats){ - return false; - } - } - - boolean updated = false; - if (forceRecompute || - params == null || - !containsAllFastStats(params)) { - if (params == null) { - params = new HashMap<>(); - } - if (!newDir) { - // The table location already exists and may contain data. - // Let's try to populate those stats that don't require full scan. - LOG.info("Updating table stats fast for " + tbl.getTableName()); - populateQuickStats(fileStatus, params); - LOG.info("Updated size of table " + tbl.getTableName() +" to "+ params.get(StatsSetupConst.TOTAL_SIZE)); - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( - StatsSetupConst.STATS_GENERATED))) { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); - } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); - } - } - tbl.setParameters(params); - updated = true; - } - return updated; - } - - public static void populateQuickStats(FileStatus[] fileStatus, Map params) { - int numFiles = 0; - long tableSize = 0L; - for (FileStatus status : fileStatus) { - // don't take directories into account for quick stats - if (!status.isDir()) { - tableSize += status.getLen(); - numFiles += 1; - } - } - params.put(StatsSetupConst.NUM_FILES, Integer.toString(numFiles)); - params.put(StatsSetupConst.TOTAL_SIZE, Long.toString(tableSize)); - } - public static boolean areSameColumns(List oldCols, List newCols) { return ListUtils.isEqualList(oldCols, newCols); } - public static void updateBasicState(EnvironmentContext environmentContext, Map - params) { - if (params == null) { - return; - } - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( - StatsSetupConst.STATS_GENERATED))) { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); - } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); - } - } - - public static boolean updatePartitionStatsFast(Partition part, Warehouse wh, EnvironmentContext environmentContext) - throws MetaException { - return updatePartitionStatsFast(part, wh, false, false, environmentContext); - } - - public static boolean updatePartitionStatsFast(Partition part, Warehouse wh, boolean madeDir, EnvironmentContext environmentContext) - throws MetaException { - return updatePartitionStatsFast(part, wh, madeDir, false, environmentContext); - } - - /** - * Updates the numFiles and totalSize parameters for the passed Partition by querying - * the warehouse if the passed Partition does not already have values for these parameters. - * @param part - * @param wh - * @param madeDir if true, the directory was just created and can be assumed to be empty - * @param forceRecompute Recompute stats even if the passed Partition already has - * these parameters set - * @return true if the stats were updated, false otherwise - */ - public static boolean updatePartitionStatsFast(Partition part, Warehouse wh, - boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { - return updatePartitionStatsFast(new PartitionSpecProxy.SimplePartitionWrapperIterator(part), - wh, madeDir, forceRecompute, environmentContext); - } - /** - * Updates the numFiles and totalSize parameters for the passed Partition by querying - * the warehouse if the passed Partition does not already have values for these parameters. - * @param part - * @param wh - * @param madeDir if true, the directory was just created and can be assumed to be empty - * @param forceRecompute Recompute stats even if the passed Partition already has - * these parameters set - * @return true if the stats were updated, false otherwise - */ - public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionIterator part, Warehouse wh, - boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { - Map params = part.getParameters(); - boolean updated = false; - if (forceRecompute || - params == null || - !containsAllFastStats(params)) { - if (params == null) { - params = new HashMap<>(); - } - if (!madeDir) { - // The partition location already existed and may contain data. Lets try to - // populate those statistics that don't require a full scan of the data. - LOG.warn("Updating partition stats fast for: " + part.getTableName()); - FileStatus[] fileStatus = wh.getFileStatusesForLocation(part.getLocation()); - populateQuickStats(fileStatus, params); - LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); - updateBasicState(environmentContext, params); - } - part.setParameters(params); - updated = true; - } - return updated; - } - /* * This method is to check if the new column list includes all the old columns with same name and * type. The column comment does not count. diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 2599ab103e..406766d8d8 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -81,8 +81,6 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.Type; import org.apache.hadoop.hive.metastore.api.UnknownDBException; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; import org.junit.Test; @@ -489,7 +487,6 @@ private static Partition makePartitionObject(String dbName, String tblName, part4.setSd(tbl.getSd().deepCopy()); part4.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo().deepCopy()); part4.getSd().setLocation(tbl.getSd().getLocation() + ptnLocationSuffix); - MetaStoreUtils.updatePartitionStatsFast(part4, warehouse, null); return part4; }