commit 137973bd7c0d9975d27fef46421283ff90bfc652 Author: Alice Fan Date: Mon Aug 13 11:58:18 2018 -0700 HIVE-20246 : Configurable collecting stats by using DO_NOT_UPDATE_STATS table property diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index a53d4be03d..4e04233e4b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -3173,8 +3173,7 @@ private Partition append_partition_common(RawStore ms, String catName, String db part.setCreateTime((int) time); part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(time)); - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreUtils.isView(tbl)) { + if (canUpdateStats(tbl)) { MetaStoreUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, envContext, true); } @@ -3790,6 +3789,20 @@ private boolean createLocationForAddedPartition( return result; } + private boolean canUpdateStats(Table tbl) { + Map tblParams = tbl.getParameters(); + boolean updateStatsTbl = true; + if ((tblParams != null) && tblParams.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) { + updateStatsTbl = !Boolean.valueOf(tblParams.get(StatsSetupConst.DO_NOT_UPDATE_STATS)); + } + if (!MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) || + MetaStoreUtils.isView(tbl) || + !updateStatsTbl) { + return false; + } + return true; + } + private void initializeAddedPartition( final Table tbl, final Partition part, boolean madeDir) throws MetaException { initializeAddedPartition(tbl, new PartitionSpecProxy.SimplePartitionWrapperIterator(part), madeDir); @@ -3797,8 +3810,7 @@ private void initializeAddedPartition( private void initializeAddedPartition( final Table tbl, final PartitionSpecProxy.PartitionIterator part, boolean madeDir) throws MetaException { - if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) && - !MetaStoreUtils.isView(tbl)) { + if (canUpdateStats(tbl)) { MetaStoreUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, null, true); } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 75ab80b439..fbe72ad931 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.metastore; -import java.lang.reflect.Field; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; @@ -38,6 +37,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.lang.reflect.*; +import static org.mockito.Mockito.mock; import com.google.common.collect.Sets; import org.apache.hadoop.hive.metastore.api.CreationMetadata; @@ -57,6 +58,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -96,6 +98,8 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; public abstract class TestHiveMetaStore { private static final Logger LOG = LoggerFactory.getLogger(TestHiveMetaStore.class); @@ -3099,4 +3103,60 @@ public void testGetUUIDInParallel() throws Exception { int size = allUuids.size(); assertEquals(numAPICallsPerThread * parallelCalls, size); } + + /** + * While altering partition(s), verify DO NOT calculate partition statistics if + *
    + *
  1. table property DO_NOT_UPDATE_STATS is true
  2. + *
  3. STATS_AUTO_GATHER is false
  4. + *
  5. Is View
  6. + *
+ * @throws SecurityException + * @throws NoSuchMethodException + * @throws InvocationTargetException + * @throws IllegalArgumentException + * @throws IllegalAccessException + */ + @Test + public void testUpdatePartitionStat_doesNotUpdateStats() throws TException, NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException { + final String DB_NAME = "db1"; + final String TABLE_NAME = "tbl1"; + Table tbl = new TableBuilder() + .setDbName(DB_NAME) + .setTableName(TABLE_NAME) + .addCol("id", "int") + .addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "true") + .build(null); + List vals = new ArrayList<>(2); + vals.add("col1"); + vals.add("col2"); + Partition part = new Partition(); + part.setDbName(DB_NAME); + part.setTableName(TABLE_NAME); + part.setValues(vals); + part.setParameters(new HashMap<>()); + part.setSd(tbl.getSd().deepCopy()); + part.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo()); + part.getSd().setLocation(tbl.getSd().getLocation() + "/"); + Warehouse wh = mock(Warehouse.class); + //Execute initializeAddedPartition() and it should not trigger updatePartitionStatsFast() as DO_NOT_UPDATE_STATS is true + HiveMetaStore.HMSHandler hms = new HiveMetaStore.HMSHandler("", conf, false); + Method m = hms.getClass().getDeclaredMethod("initializeAddedPartition", Table.class, Partition.class, boolean.class); + m.setAccessible(true); + //Invoke initializeAddedPartition(); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + + //Remove tbl's DO_NOT_UPDATE_STATS & set STATS_AUTO_GATHER = false + tbl.unsetParameters(); + MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, false); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + + //Set STATS_AUTO_GATHER = true and set tbl as a VIRTUAL_VIEW + MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, true); + tbl.setTableType("VIRTUAL_VIEW"); + m.invoke(hms, tbl, part, false); + verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation()); + } }