diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 06061c0..c69e7e6 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -1396,6 +1397,64 @@ public void testSimpleTable() throws Exception { } } + // Tests that in the absence of stats for partitions, and/or absence of columns + // to get stats for, the metastore does not break. See HIVE-12083 for motivation. + public void testStatsFastTrivial() throws Throwable { + String dbName = "tstatsfast"; + String tblName = "t1"; + String tblOwner = "statstester"; + String typeName = "Person"; + int lastAccessed = 12083; + + cleanUp(dbName,tblName,typeName); + + List> values = new ArrayList>(); + values.add(makeVals("2008-07-01 14:13:12", "14")); + values.add(makeVals("2008-07-01 14:13:12", "15")); + values.add(makeVals("2008-07-02 14:13:12", "15")); + values.add(makeVals("2008-07-03 14:13:12", "151")); + + createMultiPartitionTableSchema(dbName, tblName, typeName, values); + + List emptyColNames = new ArrayList(); + List emptyPartNames = new ArrayList(); + + List colNames = new ArrayList(); + colNames.add("name"); + colNames.add("income"); + List partNames = client.listPartitionNames(dbName,tblName,(short)-1); + + assertEquals(0,emptyColNames.size()); + assertEquals(0,emptyPartNames.size()); + assertEquals(2,colNames.size()); + assertEquals(4,partNames.size()); + + + // Test for both colNames and partNames being empty: + AggrStats aggrStatsEmpty = client.getAggrColStatsFor(dbName,tblName,emptyColNames,emptyPartNames); + assertNull(aggrStatsEmpty); // will be null as the client short-circuits calling + + // Test for only colNames being empty + AggrStats aggrStatsOnlyParts = client.getAggrColStatsFor(dbName,tblName,emptyColNames,partNames); + assertNull(aggrStatsEmpty); // will be null as the client short-circuits calling + + // Test for only partNames being empty + AggrStats aggrStatsOnlyCols = client.getAggrColStatsFor(dbName,tblName,colNames,emptyPartNames); + assertNotNull(aggrStatsOnlyCols); // will not be short-circuited on client-side, but should be empty from MS return + assertEquals(0,aggrStatsOnlyCols.getPartsFound()); + assertNotNull(aggrStatsOnlyCols.getColStats()); + assert(aggrStatsOnlyCols.getColStats().isEmpty()); + + // Test for valid values for both. + AggrStats aggrStatsFull = client.getAggrColStatsFor(dbName,tblName,colNames,partNames); + assertNotNull(aggrStatsFull); + assertEquals(0,aggrStatsFull.getPartsFound()); // would still be empty, because no stats are actually populated. + assertNotNull(aggrStatsFull.getColStats()); + assert(aggrStatsFull.getColStats().isEmpty()); + + } + + public void testColumnStatistics() throws Throwable { String dbName = "columnstatstestdb"; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 3455a92..698ee46 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -1150,7 +1150,10 @@ public ColumnStatistics getTableStats( public AggrStats aggrColStatsForPartitions(String dbName, String tableName, List partNames, List colNames, boolean useDensityFunctionForNDVEstimation) throws MetaException { - if (colNames.isEmpty() || partNames.isEmpty()) return new AggrStats(); // Nothing to aggregate. + if (colNames.isEmpty() || partNames.isEmpty()) { + LOG.debug("Columns is empty or partNames is empty : Short-circuiting stats eval"); + return new AggrStats(new ArrayList(),0); // Nothing to aggregate + } long partsFound = partsFoundForPartitions(dbName, tableName, partNames, colNames); List colStatsList; // Try to read from the cache first