commit daaf3f185c0e44901399ed457211a65eecf9d0bb Author: Alice Fan Date: Mon Aug 13 11:58:18 2018 -0700 HIVE-20246 : Configurable collecting stats by using DO_NOT_UPDATE_STATS table property diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 324035a809..15492e383c 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -3174,8 +3174,7 @@ private Partition append_partition_common(RawStore ms, String catName, String db part.setCreateTime((int) time); part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(time)); - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreServerUtils.isView(tbl)) { + if (canUpdateStats(tbl)) { MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, envContext, true); } @@ -3791,6 +3790,27 @@ private boolean createLocationForAddedPartition( return result; } + /** + * Verify if update stats while altering partition(s) + * For the following three cases HMS will not update partition stats + * 1) Table property 'DO_NOT_UPDATE_STATS' = True + * 2) HMS configuration property 'STATS_AUTO_GATHER' = False + * 3) Is View + */ + private boolean canUpdateStats(Table tbl) { + Map tblParams = tbl.getParameters(); + boolean updateStatsTbl = true; + if ((tblParams != null) && tblParams.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) { + updateStatsTbl = !Boolean.valueOf(tblParams.get(StatsSetupConst.DO_NOT_UPDATE_STATS)); + } + if (!MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) || + MetaStoreServerUtils.isView(tbl) || + !updateStatsTbl) { + return false; + } + return true; + } + private void initializeAddedPartition( final Table tbl, final Partition part, boolean madeDir) throws MetaException { initializeAddedPartition(tbl, new PartitionSpecProxy.SimplePartitionWrapperIterator(part), madeDir); @@ -3798,8 +3818,7 @@ private void initializeAddedPartition( private void initializeAddedPartition( final Table tbl, final PartitionSpecProxy.PartitionIterator part, boolean madeDir) throws MetaException { - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreServerUtils.isView(tbl)) { + if (canUpdateStats(tbl)) { MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, null, true); } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 60beab6350..4937d9d861 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.metastore; -import java.lang.reflect.Field; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; @@ -38,6 +37,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.lang.reflect.*; +import static org.mockito.Mockito.mock; import com.google.common.collect.Sets; import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; @@ -57,6 +58,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -95,6 +97,8 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; public abstract class TestHiveMetaStore { private static final Logger LOG = LoggerFactory.getLogger(TestHiveMetaStore.class); @@ -3098,4 +3102,55 @@ public void testGetUUIDInParallel() throws Exception { int size = allUuids.size(); assertEquals(numAPICallsPerThread * parallelCalls, size); } + + /** + * While altering partition(s), verify DO NOT calculate partition statistics if + *
    + *
  1. table property DO_NOT_UPDATE_STATS is true
  2. + *
  3. STATS_AUTO_GATHER is false
  4. + *
  5. Is View
  6. + *
+ */ + @Test + public void testUpdatePartitionStat_doesNotUpdateStats() throws Exception { + final String DB_NAME = "db1"; + final String TABLE_NAME = "tbl1"; + Table tbl = new TableBuilder() + .setDbName(DB_NAME) + .setTableName(TABLE_NAME) + .addCol("id", "int") + .addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "true") + .build(null); + List vals = new ArrayList<>(2); + vals.add("col1"); + vals.add("col2"); + Partition part = new Partition(); + part.setDbName(DB_NAME); + part.setTableName(TABLE_NAME); + part.setValues(vals); + part.setParameters(new HashMap<>()); + part.setSd(tbl.getSd().deepCopy()); + part.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo()); + part.getSd().setLocation(tbl.getSd().getLocation() + "/partCol=1"); + Warehouse wh = mock(Warehouse.class); + //Execute initializeAddedPartition() and it should not trigger updatePartitionStatsFast() as DO_NOT_UPDATE_STATS is true + HiveMetaStore.HMSHandler hms = new HiveMetaStore.HMSHandler("", conf, false); + Method m = hms.getClass().getDeclaredMethod("initializeAddedPartition", Table.class, Partition.class, boolean.class); + m.setAccessible(true); + //Invoke initializeAddedPartition(); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + + //Remove tbl's DO_NOT_UPDATE_STATS & set STATS_AUTO_GATHER = false + tbl.unsetParameters(); + MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, false); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + + //Set STATS_AUTO_GATHER = true and set tbl as a VIRTUAL_VIEW + MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, true); + tbl.setTableType("VIRTUAL_VIEW"); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + } }