diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java index eb1e0cc..4dfcd81 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java @@ -33,6 +33,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -47,8 +48,7 @@ // Cache size private final int maxCacheNodes; // Current nodes in the cache - // TODO: Make access threadsafe!!! - private int currentNodes = 0; + private AtomicInteger currentNodes = new AtomicInteger(0); // Run the cleaner thread when the cache is maxFull% full private final float maxFull; // Run the cleaner thread until cache is cleanUntil% occupied @@ -124,8 +124,9 @@ int getMaxCacheNodes() { return maxCacheNodes; } + // Don't want to lock on this, so this may return approximate value int getCurrentNodes() { - return currentNodes; + return currentNodes.intValue(); } int getMaxPartsPerCacheNode() { @@ -265,7 +266,7 @@ private AggrColStatsCached findBestMatch(List partNames, List maxFull) { + if (getCurrentNodes() / maxCacheNodes > maxFull) { clean(); } // Cache key @@ -286,7 +287,7 @@ void add(String dbName, String tblName, String colName, int numPartsCached, nodeList.nodes.add(node); node.updateLastAccessTime(); nodeList.updateLastAccessTime(); - ++currentNodes; + currentNodes.getAndIncrement(); } } catch (InterruptedException e) { LOG.debug(e); @@ -333,7 +334,7 @@ public void run() { // Remove the node if it has expired if (isExpired(node)) { listIterator.remove(); - --currentNodes; + currentNodes.getAndDecrement(); } } } @@ -349,7 +350,7 @@ public void run() { } // If the expired nodes did not result in cache being cleanUntil% in size, // start removing LRU nodes - while (currentNodes / maxCacheNodes > cleanUntil) { + while (getCurrentNodes() / maxCacheNodes > cleanUntil) { evictOneNode(); } } @@ -395,7 +396,7 @@ private void evictOneNode() { // return if (isExpired(candidate)) { iterator.remove(); - --currentNodes; + currentNodes.getAndDecrement(); return; } // Sorry, too many ifs but this form looks optimal @@ -413,7 +414,7 @@ private void evictOneNode() { } } candidateList.nodes.remove(deleteIndex); - --currentNodes; + currentNodes.getAndDecrement(); } } catch (InterruptedException e) { LOG.debug(e); @@ -436,7 +437,7 @@ private boolean isExpired(AggrColStatsCached aggrColStats) { /** * Key object for the stats cache hashtable */ - private static class Key { + static class Key { private final String dbName; private final String tblName; private final String colName; @@ -527,13 +528,6 @@ void updateLastAccessTime() { } /** - * TODO: capture some metrics for the cache - */ - class Metrics { - - } - - /** * Intermediate object, used to collect hits & misses for each cache node that is evaluate for an * incoming request */ @@ -550,6 +544,13 @@ void updateLastAccessTime() { } /** + * TODO: capture some metrics for the cache + */ + class Metrics { + + } + + /** * TODO: implement memory management for the cache */ static class MemoryManager { diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java index e481317..6c288f4 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java @@ -83,6 +83,26 @@ public Result answer(InvocationOnMock invocation) throws Throwable { } }); + Mockito.when(htable.get(Mockito.anyListOf(Get.class))).thenAnswer(new Answer() { + @Override + public Result[] answer(InvocationOnMock invocation) throws Throwable { + @SuppressWarnings("unchecked") + List gets = (List) invocation.getArguments()[0]; + Result[] results = new Result[gets.size()]; + for (int i = 0; i < gets.size(); i++) { + Cell cell = rows.get(new String(gets.get(i).getRow())); + Result result; + if (cell == null) { + result = new Result(); + } else { + result = Result.create(new Cell[]{cell}); + } + results[i] = result; + } + return results; + } + }); + Mockito.when(htable.getScanner(Mockito.any(Scan.class))).thenAnswer(new Answer() { @Override public ResultScanner answer(InvocationOnMock invocation) throws Throwable { diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java index fb36b44..aa2f4a2 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.AggrColStatsCached; +import org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.Key; import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter; import org.junit.After; import org.junit.AfterClass; @@ -36,19 +37,21 @@ import org.junit.Test; public class TestAggregateStatsCache { + static String DB_NAME = "db"; + static String TAB_PREFIX = "tab"; + static String PART_PREFIX = "part"; + static String COL_PREFIX = "col"; + static int NUM_TABS = 2; + static int NUM_PARTS = 20; + static int NUM_COLS = 5; static int MAX_CACHE_NODES = 10; static int MAX_PARTITIONS_PER_CACHE_NODE = 10; static String TIME_TO_LIVE = "20s"; + static String MAX_WRITER_WAIT = "1s"; + static String MAX_READER_WAIT = "1s"; static float FALSE_POSITIVE_PROBABILITY = (float) 0.01; - static float MAX_VARIANCE = (float) 0.1; + static float MAX_VARIANCE = (float) 0.5; static AggregateStatsCache cache; - static String dbName = "db"; - static String tablePrefix = "tab"; - static String partitionPrefix = "part"; - static String columnPrefix = "col"; - static int numTables = 2; - static int numPartitions = 20; - static int numColumns = 5; static List tables = new ArrayList(); static List tabParts = new ArrayList(); static List tabCols = new ArrayList(); @@ -61,24 +64,24 @@ public static void beforeTest() { initializeColumns(); } + // tab1, tab2 private static void initializeTables() { - for (int i = 1; i <= numTables; i++) { - // tab1, tab2 - tables.add(tablePrefix + i); + for (int i = 1; i <= NUM_TABS; i++) { + tables.add(TAB_PREFIX + i); } } + // part1 ... part20 private static void initializePartitions() { - for (int i = 1; i <= numPartitions; i++) { - // part1 ... part20 - tabParts.add(partitionPrefix + i); + for (int i = 1; i <= NUM_PARTS; i++) { + tabParts.add(PART_PREFIX + i); } } + // col1 ... col5 private static void initializeColumns() { - for (int i = 1; i <= numColumns; i++) { - // part1 ... part20 - tabCols.add(columnPrefix + i); + for (int i = 1; i <= NUM_COLS; i++) { + tabCols.add(COL_PREFIX + i); } } @@ -99,8 +102,9 @@ public void setUp() { hiveConf.setFloatVar(HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE, MAX_VARIANCE); hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE, TIME_TO_LIVE); + hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT, MAX_WRITER_WAIT); + hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT, MAX_READER_WAIT); cache = AggregateStatsCache.getInstance(hiveConf); - } @After @@ -108,26 +112,33 @@ public void tearDown() { } @Test - public void testBasicAddAndGet() { - // Add a dummy aggregate stats object for parts 1-9 of tab1 for col1 - int minPart = 1; - int maxPart = 9; + public void testCacheKey() { + Key k1 = new Key("db", "tbl1", "col"); + Key k2 = new Key("db", "tbl1", "col"); + // k1 equals k2 + Assert.assertEquals(k1, k2); + Key k3 = new Key("db", "tbl2", "col"); + // k1 not equals k3 + Assert.assertNotEquals(k1, k3); + } + + @Test + public void testBasicAddAndGet() throws Exception { + // Partnames: [tab1part1...tab1part9] + List partNames = preparePartNames(tables.get(0), 1, 9); + // Prepare the bloom filter + BloomFilter bloomFilter = prepareBloomFilter(partNames); + // Add a dummy aggregate stats object for the above parts (part1...part9) of tab1 for col1 String tblName = tables.get(0); String colName = tabCols.get(0); - ColumnStatisticsObj aggrColStats = getDummyLongColStat(colName); - // Prepare the bloom filter - BloomFilter bloomFilter = - new BloomFilter(MAX_PARTITIONS_PER_CACHE_NODE, FALSE_POSITIVE_PROBABILITY); - List partNames = new ArrayList(); - for (int i = minPart; i <= maxPart; i++) { - String partName = tabParts.get(i); - partNames.add(partName); - bloomFilter.addToFilter(partName.getBytes()); - } - // Now add to cache - cache.add(dbName, tblName, colName, maxPart-minPart+1, aggrColStats, bloomFilter); + int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5; + // We'll treat this as the aggregate col stats for part1...part9 of tab1, col1 + ColumnStatisticsObj aggrColStats = + getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls); + // Now add to cache the dummy colstats for these 10 partitions + cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter); // Now get from cache - AggrColStatsCached aggrStatsCached = cache.get(dbName, tblName, colName, partNames); + AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); Assert.assertNotNull(aggrStatsCached); ColumnStatisticsObj aggrColStatsCached = aggrStatsCached.getColStats(); @@ -139,47 +150,110 @@ public void testBasicAddAndGet() { } @Test - public void testAddGetWithVariance() { - // Add a dummy aggregate stats object for parts 1-9 of tab1 for col1 - int minPart = 1; - int maxPart = 9; + public void testAddGetWithVariance() throws Exception { + // Partnames: [tab1part1...tab1part9] + List partNames = preparePartNames(tables.get(0), 1, 9); + // Prepare the bloom filter + BloomFilter bloomFilter = prepareBloomFilter(partNames); + // Add a dummy aggregate stats object for the above parts (part1...part9) of tab1 for col1 String tblName = tables.get(0); String colName = tabCols.get(0); - ColumnStatisticsObj aggrColStats = getDummyLongColStat(colName); - // Prepare the bloom filter - BloomFilter bloomFilter = - new BloomFilter(MAX_PARTITIONS_PER_CACHE_NODE, FALSE_POSITIVE_PROBABILITY); - // The paritions we'll eventually request from the cache - List partNames = new ArrayList(); - for (int i = minPart-1; i <= maxPart-1; i++) { - String partName = tabParts.get(i); - // Only add 50% partitions to partnames so that we can see if the request fails - if (i < maxPart / 2) { - partNames.add(partName); - } - bloomFilter.addToFilter(partName.getBytes()); - } + int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5; + // We'll treat this as the aggregate col stats for part1...part9 of tab1, col1 + ColumnStatisticsObj aggrColStats = + getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls); // Now add to cache - cache.add(dbName, tblName, colName, maxPart-minPart+1, aggrColStats, bloomFilter); - // Now get from cache - AggrColStatsCached aggrStatsCached = cache.get(dbName, tblName, colName, partNames); + cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter); + + // Now prepare partnames with only 5 partitions: [tab1part1...tab1part5] + partNames = preparePartNames(tables.get(0), 1, 5); + // This get should fail because its variance ((10-5)/5) is way past MAX_VARIANCE (0.5) + AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); + Assert.assertNull(aggrStatsCached); + + // Now prepare partnames with 10 partitions: [tab1part11...tab1part20], but with no overlap + partNames = preparePartNames(tables.get(0), 11, 20); + // This get should fail because its variance ((10-0)/10) is way past MAX_VARIANCE (0.5) + aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); Assert.assertNull(aggrStatsCached); + + // Now prepare partnames with 9 partitions: [tab1part1...tab1part8], which are contained in the + // object that we added to the cache + partNames = preparePartNames(tables.get(0), 1, 8); + // This get should succeed because its variance ((10-9)/9) is within past MAX_VARIANCE (0.5) + aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); + Assert.assertNotNull(aggrStatsCached); + ColumnStatisticsObj aggrColStatsCached = aggrStatsCached.getColStats(); + Assert.assertEquals(aggrColStats, aggrColStatsCached); } @Test - public void testMultiThreaded() { + public void testTimeToLive() throws Exception { + // Add a dummy node to cache + // Partnames: [tab1part1...tab1part9] + List partNames = preparePartNames(tables.get(0), 1, 9); + // Prepare the bloom filter + BloomFilter bloomFilter = prepareBloomFilter(partNames); + // Add a dummy aggregate stats object for the above parts (part1...part9) of tab1 for col1 + String tblName = tables.get(0); + String colName = tabCols.get(0); + int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5; + // We'll treat this as the aggregate col stats for part1...part9 of tab1, col1 + ColumnStatisticsObj aggrColStats = + getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls); + // Now add to cache + cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter); + + // Sleep for 30 seconds + Thread.sleep(30000); + + // Get should fail now (since TTL is 20s) and we've snoozed for 30 seconds + AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); + Assert.assertNull(aggrStatsCached); + } + + /** + * Prepares an array of partition names by getting partitions from minPart ... maxPart and + * prepending with table name + * Example: [tab1part1, tab1part2 ...] + * + * @param tabName + * @param minPart + * @param maxPart + * @return + * @throws Exception + */ + private List preparePartNames(String tabName, int minPart, int maxPart) throws Exception { + if ((minPart < 1) || (maxPart > NUM_PARTS)) { + throw new Exception("tabParts does not have these partition numbers"); + } + List partNames = new ArrayList(); + for (int i = minPart; i <= maxPart; i++) { + String partName = tabParts.get(i-1); + partNames.add(tabName + partName); + } + return partNames; + } + + /** + * Prepares a bloom filter from the list of partition names + * @param partNames + * @return + */ + private BloomFilter prepareBloomFilter(List partNames) { + BloomFilter bloomFilter = + new BloomFilter(MAX_PARTITIONS_PER_CACHE_NODE, FALSE_POSITIVE_PROBABILITY); + for (String partName: partNames) { + bloomFilter.addToFilter(partName.getBytes()); + } + return bloomFilter; } - private ColumnStatisticsObj getDummyLongColStat(String colName) { + private ColumnStatisticsObj getDummyLongColStat(String colName, int highVal, int lowVal, int numDVs, int numNulls) { ColumnStatisticsObj aggrColStats = new ColumnStatisticsObj(); aggrColStats.setColName(colName); aggrColStats.setColType("long"); LongColumnStatsData longStatsData = new LongColumnStatsData(); - // Set some random values - int highVal = 100; - int lowVal = 10; - int numDVs = 50; - int numNulls = 5; longStatsData.setHighValue(highVal); longStatsData.setLowValue(lowVal); longStatsData.setNumDVs(numDVs); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java index df3cd73..92c9ba4 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -49,8 +50,10 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -73,6 +76,51 @@ public class TestHBaseStore { private static final Log LOG = LogFactory.getLog(TestHBaseStore.class.getName()); static Map emptyParameters = new HashMap(); + // Table with NUM_PART_KEYS partitioning keys and NUM_PARTITIONS values per key + static final int NUM_PART_KEYS = 1; + static final int NUM_PARTITIONS = 5; + static final String DB = "db"; + static final String TBL = "tbl"; + static final String COL = "col"; + static final String PART_KEY_PREFIX = "part"; + static final String PART_VAL_PREFIX = "val"; + static final String PART_KV_SEPARATOR = "="; + static final List PART_KEYS = new ArrayList(); + static final List PART_VALS = new ArrayList(); + // Initialize mock partitions + static { + for (int i = 1; i <= NUM_PART_KEYS; i++) { + PART_KEYS.add(PART_KEY_PREFIX + i); + } + for (int i = 1; i <= NUM_PARTITIONS; i++) { + PART_VALS.add(PART_VAL_PREFIX + i); + } + } + static final long DEFAULT_TIME = System.currentTimeMillis(); + static final String BOOLEAN_COL = "boolCol"; + static final String BOOLEAN_TYPE = "boolean"; + static final String LONG_COL = "longCol"; + static final String LONG_TYPE = "long"; + static final String DOUBLE_COL = "doubleCol"; + static final String DOUBLE_TYPE = "double"; + static final String STRING_COL = "stringCol"; + static final String STRING_TYPE = "string"; + static final String BINARY_COL = "binaryCol"; + static final String BINARY_TYPE = "binary"; + static final String DECIMAL_COL = "decimalCol"; + static final String DECIMAL_TYPE = "decimal(5,3)"; + static List booleanColStatsObjs = new ArrayList( + NUM_PARTITIONS); + static List longColStatsObjs = new ArrayList( + NUM_PARTITIONS); + static List doubleColStatsObjs = new ArrayList( + NUM_PARTITIONS); + static List stringColStatsObjs = new ArrayList( + NUM_PARTITIONS); + static List binaryColStatsObjs = new ArrayList( + NUM_PARTITIONS); + static List decimalColStatsObjs = new ArrayList( + NUM_PARTITIONS); @Rule public ExpectedException thrown = ExpectedException.none(); @Mock HTableInterface htable; @@ -80,6 +128,153 @@ HBaseStore store; + @BeforeClass + public static void beforeTest() { + // All data intitializations + populateMockStats(); + } + + private static void populateMockStats() { + ColumnStatisticsObj statsObj; + // Add NUM_PARTITIONS ColumnStatisticsObj of each type + // For aggregate stats test, we'll treat each ColumnStatisticsObj as stats for 1 partition + // For the rest, we'll just pick the 1st ColumnStatisticsObj from this list and use it + for (int i = 0; i < NUM_PARTITIONS; i++) { + statsObj = mockBooleanStats(i); + booleanColStatsObjs.add(statsObj); + statsObj = mockLongStats(i); + longColStatsObjs.add(statsObj); + statsObj = mockDoubleStats(i); + doubleColStatsObjs.add(statsObj); + statsObj = mockStringStats(i); + stringColStatsObjs.add(statsObj); + statsObj = mockBinaryStats(i); + binaryColStatsObjs.add(statsObj); + statsObj = mockDecimalStats(i); + decimalColStatsObjs.add(statsObj); + } + } + + private static ColumnStatisticsObj mockBooleanStats(int i) { + long trues = 37 + 100*i; + long falses = 12 + 50*i; + long nulls = 2 + i; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(BOOLEAN_COL); + colStatsObj.setColType(BOOLEAN_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + BooleanColumnStatsData boolData = new BooleanColumnStatsData(); + boolData.setNumTrues(trues); + boolData.setNumFalses(falses); + boolData.setNumNulls(nulls); + data.setBooleanStats(boolData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + private static ColumnStatisticsObj mockLongStats(int i) { + long high = 120938479124L + 100*i; + long low = -12341243213412124L - 50*i; + long nulls = 23 + i; + long dVs = 213L + 10*i; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(LONG_COL); + colStatsObj.setColType(LONG_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData longData = new LongColumnStatsData(); + longData.setHighValue(high); + longData.setLowValue(low); + longData.setNumNulls(nulls); + longData.setNumDVs(dVs); + data.setLongStats(longData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + private static ColumnStatisticsObj mockDoubleStats(int i) { + double high = 123423.23423 + 100*i; + double low = 0.00001234233 - 50*i; + long nulls = 92 + i; + long dVs = 1234123421L + 10*i; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(DOUBLE_COL); + colStatsObj.setColType(DOUBLE_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); + doubleData.setHighValue(high); + doubleData.setLowValue(low); + doubleData.setNumNulls(nulls); + doubleData.setNumDVs(dVs); + data.setDoubleStats(doubleData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + private static ColumnStatisticsObj mockStringStats(int i) { + long maxLen = 1234 + 10*i; + double avgLen = 32.3 + i; + long nulls = 987 + 10*i; + long dVs = 906 + i; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(STRING_COL); + colStatsObj.setColType(STRING_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + StringColumnStatsData stringData = new StringColumnStatsData(); + stringData.setMaxColLen(maxLen); + stringData.setAvgColLen(avgLen); + stringData.setNumNulls(nulls); + stringData.setNumDVs(dVs); + data.setStringStats(stringData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + private static ColumnStatisticsObj mockBinaryStats(int i) {; + long maxLen = 123412987L + 10*i; + double avgLen = 76.98 + i; + long nulls = 976998797L + 10*i; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(BINARY_COL); + colStatsObj.setColType(BINARY_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + BinaryColumnStatsData binaryData = new BinaryColumnStatsData(); + binaryData.setMaxColLen(maxLen); + binaryData.setAvgColLen(avgLen); + binaryData.setNumNulls(nulls); + data.setBinaryStats(binaryData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + private static ColumnStatisticsObj mockDecimalStats(int i) { + Decimal high = new Decimal(); + high.setScale((short)3); + String strHigh = String.valueOf(3876 + 100*i); + high.setUnscaled(strHigh.getBytes()); + Decimal low = new Decimal(); + low.setScale((short)3); + String strLow = String.valueOf(38 + i); + low.setUnscaled(strLow.getBytes()); + long nulls = 13 + i; + long dVs = 923947293L + 100*i; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(DECIMAL_COL); + colStatsObj.setColType(DECIMAL_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + DecimalColumnStatsData decimalData = new DecimalColumnStatsData(); + decimalData.setHighValue(high); + decimalData.setLowValue(low); + decimalData.setNumNulls(nulls); + decimalData.setNumDVs(dVs); + data.setDecimalStats(decimalData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + @AfterClass + public static void afterTest() { + } + @Before public void init() throws IOException { @@ -131,16 +326,15 @@ public void dropDb() throws Exception { @Test public void createFunction() throws Exception { - String dbname = "default"; String funcName = "createfunc"; int now = (int)(System.currentTimeMillis()/ 1000); - Function func = new Function(funcName, dbname, "o.a.h.h.myfunc", "me", PrincipalType.USER, + Function func = new Function(funcName, DB, "o.a.h.h.myfunc", "me", PrincipalType.USER, now, FunctionType.JAVA, Arrays.asList(new ResourceUri(ResourceType.JAR, "file:/tmp/somewhere"))); store.createFunction(func); - Function f = store.getFunction(dbname, funcName); - Assert.assertEquals(dbname, f.getDbName()); + Function f = store.getFunction(DB, funcName); + Assert.assertEquals(DB, f.getDbName()); Assert.assertEquals(funcName, f.getFunctionName()); Assert.assertEquals("o.a.h.h.myfunc", f.getClassName()); Assert.assertEquals("me", f.getOwnerName()); @@ -154,22 +348,21 @@ public void createFunction() throws Exception { @Test public void alterFunction() throws Exception { - String dbname = "default"; String funcName = "alterfunc"; int now = (int)(System.currentTimeMillis()/ 1000); List uris = new ArrayList(); uris.add(new ResourceUri(ResourceType.FILE, "whatever")); - Function func = new Function(funcName, dbname, "o.a.h.h.myfunc", "me", PrincipalType.USER, + Function func = new Function(funcName, DB, "o.a.h.h.myfunc", "me", PrincipalType.USER, now, FunctionType.JAVA, uris); store.createFunction(func); - Function f = store.getFunction(dbname, funcName); + Function f = store.getFunction(DB, funcName); Assert.assertEquals(ResourceType.FILE, f.getResourceUris().get(0).getResourceType()); func.addToResourceUris(new ResourceUri(ResourceType.ARCHIVE, "file")); - store.alterFunction(dbname, funcName, func); + store.alterFunction(DB, funcName, func); - f = store.getFunction(dbname, funcName); + f = store.getFunction(DB, funcName); Assert.assertEquals(2, f.getResourceUrisSize()); Assert.assertEquals(ResourceType.FILE, f.getResourceUris().get(0).getResourceType()); Assert.assertEquals(ResourceType.ARCHIVE, f.getResourceUris().get(1).getResourceType()); @@ -178,19 +371,18 @@ public void alterFunction() throws Exception { @Test public void dropFunction() throws Exception { - String dbname = "default"; String funcName = "delfunc"; int now = (int)(System.currentTimeMillis()/ 1000); - Function func = new Function(funcName, dbname, "o.a.h.h.myfunc", "me", PrincipalType.USER, + Function func = new Function(funcName, DB, "o.a.h.h.myfunc", "me", PrincipalType.USER, now, FunctionType.JAVA, Arrays.asList(new ResourceUri(ResourceType.JAR, "file:/tmp/somewhere"))); store.createFunction(func); - Function f = store.getFunction(dbname, funcName); + Function f = store.getFunction(DB, funcName); Assert.assertNotNull(f); - store.dropFunction(dbname, funcName); + store.dropFunction(DB, funcName); //thrown.expect(NoSuchObjectException.class); - Assert.assertNull(store.getFunction(dbname, funcName)); + Assert.assertNull(store.getFunction(DB, funcName)); } @Test @@ -364,7 +556,6 @@ public void dropTable() throws Exception { @Test public void createPartition() throws Exception { - String dbName = "default"; String tableName = "myparttable"; int startTime = (int)(System.currentTimeMillis() / 1000); List cols = new ArrayList(); @@ -374,18 +565,18 @@ public void createPartition() throws Exception { serde, null, null, emptyParameters); List partCols = new ArrayList(); partCols.add(new FieldSchema("pc", "string", "")); - Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + Table table = new Table(tableName, DB, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null); store.createTable(table); List vals = Arrays.asList("fred"); StorageDescriptor psd = new StorageDescriptor(sd); psd.setLocation("file:/tmp/pc=fred"); - Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + Partition part = new Partition(vals, DB, tableName, startTime, startTime, psd, emptyParameters); store.addPartition(part); - Partition p = store.getPartition(dbName, tableName, vals); + Partition p = store.getPartition(DB, tableName, vals); Assert.assertEquals(1, p.getSd().getColsSize()); Assert.assertEquals("col1", p.getSd().getCols().get(0).getName()); Assert.assertEquals("int", p.getSd().getCols().get(0).getType()); @@ -395,18 +586,17 @@ public void createPartition() throws Exception { Assert.assertEquals("file:/tmp/pc=fred", p.getSd().getLocation()); Assert.assertEquals("input", p.getSd().getInputFormat()); Assert.assertEquals("output", p.getSd().getOutputFormat()); - Assert.assertEquals(dbName, p.getDbName()); + Assert.assertEquals(DB, p.getDbName()); Assert.assertEquals(tableName, p.getTableName()); Assert.assertEquals(1, p.getValuesSize()); Assert.assertEquals("fred", p.getValues().get(0)); - Assert.assertTrue(store.doesPartitionExist(dbName, tableName, vals)); - Assert.assertFalse(store.doesPartitionExist(dbName, tableName, Arrays.asList("bob"))); + Assert.assertTrue(store.doesPartitionExist(DB, tableName, vals)); + Assert.assertFalse(store.doesPartitionExist(DB, tableName, Arrays.asList("bob"))); } @Test public void alterPartition() throws Exception { - String dbName = "default"; String tableName = "alterparttable"; int startTime = (int)(System.currentTimeMillis() / 1000); List cols = new ArrayList(); @@ -416,21 +606,21 @@ public void alterPartition() throws Exception { serde, null, null, emptyParameters); List partCols = new ArrayList(); partCols.add(new FieldSchema("pc", "string", "")); - Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + Table table = new Table(tableName, DB, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null); store.createTable(table); List vals = Arrays.asList("fred"); StorageDescriptor psd = new StorageDescriptor(sd); psd.setLocation("file:/tmp/pc=fred"); - Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + Partition part = new Partition(vals, DB, tableName, startTime, startTime, psd, emptyParameters); store.addPartition(part); part.setLastAccessTime(startTime + 10); - store.alterPartition(dbName, tableName, vals, part); + store.alterPartition(DB, tableName, vals, part); - Partition p = store.getPartition(dbName, tableName, vals); + Partition p = store.getPartition(DB, tableName, vals); Assert.assertEquals(1, p.getSd().getColsSize()); Assert.assertEquals("col1", p.getSd().getCols().get(0).getName()); Assert.assertEquals("int", p.getSd().getCols().get(0).getType()); @@ -440,19 +630,18 @@ public void alterPartition() throws Exception { Assert.assertEquals("file:/tmp/pc=fred", p.getSd().getLocation()); Assert.assertEquals("input", p.getSd().getInputFormat()); Assert.assertEquals("output", p.getSd().getOutputFormat()); - Assert.assertEquals(dbName, p.getDbName()); + Assert.assertEquals(DB, p.getDbName()); Assert.assertEquals(tableName, p.getTableName()); Assert.assertEquals(1, p.getValuesSize()); Assert.assertEquals("fred", p.getValues().get(0)); Assert.assertEquals(startTime + 10, p.getLastAccessTime()); - Assert.assertTrue(store.doesPartitionExist(dbName, tableName, vals)); - Assert.assertFalse(store.doesPartitionExist(dbName, tableName, Arrays.asList("bob"))); + Assert.assertTrue(store.doesPartitionExist(DB, tableName, vals)); + Assert.assertFalse(store.doesPartitionExist(DB, tableName, Arrays.asList("bob"))); } @Test public void getPartitions() throws Exception { - String dbName = "default"; String tableName = "manyParts"; int startTime = (int)(System.currentTimeMillis() / 1000); List cols = new ArrayList(); @@ -462,7 +651,7 @@ public void getPartitions() throws Exception { serde, null, null, emptyParameters); List partCols = new ArrayList(); partCols.add(new FieldSchema("pc", "string", "")); - Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + Table table = new Table(tableName, DB, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null); store.createTable(table); @@ -472,15 +661,15 @@ public void getPartitions() throws Exception { vals.add(val); StorageDescriptor psd = new StorageDescriptor(sd); psd.setLocation("file:/tmp/pc=" + val); - Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + Partition part = new Partition(vals, DB, tableName, startTime, startTime, psd, emptyParameters); store.addPartition(part); - Partition p = store.getPartition(dbName, tableName, vals); + Partition p = store.getPartition(DB, tableName, vals); Assert.assertEquals("file:/tmp/pc=" + val, p.getSd().getLocation()); } - List parts = store.getPartitions(dbName, tableName, -1); + List parts = store.getPartitions(DB, tableName, -1); Assert.assertEquals(5, parts.size()); String[] pv = new String[5]; for (int i = 0; i < 5; i++) pv[i] = parts.get(i).getValues().get(0); @@ -490,7 +679,6 @@ public void getPartitions() throws Exception { @Test public void listGetDropPartitionNames() throws Exception { - String dbName = "default"; String tableName = "listParts"; int startTime = (int)(System.currentTimeMillis() / 1000); List cols = new ArrayList(); @@ -501,7 +689,7 @@ public void listGetDropPartitionNames() throws Exception { List partCols = new ArrayList(); partCols.add(new FieldSchema("pc", "string", "")); partCols.add(new FieldSchema("region", "string", "")); - Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + Table table = new Table(tableName, DB, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null); store.createTable(table); @@ -511,31 +699,30 @@ public void listGetDropPartitionNames() throws Exception { for (String v : pv) vals.add(v); StorageDescriptor psd = new StorageDescriptor(sd); psd.setLocation("file:/tmp/pc=" + pv[0] + "/region=" + pv[1]); - Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + Partition part = new Partition(vals, DB, tableName, startTime, startTime, psd, emptyParameters); store.addPartition(part); } - List names = store.listPartitionNames(dbName, tableName, (short) -1); + List names = store.listPartitionNames(DB, tableName, (short) -1); Assert.assertEquals(2, names.size()); String[] resultNames = names.toArray(new String[names.size()]); Arrays.sort(resultNames); Assert.assertArrayEquals(resultNames, new String[]{"pc=today/region=north america", "pc=tomorrow/region=europe"}); - List parts = store.getPartitionsByNames(dbName, tableName, names); + List parts = store.getPartitionsByNames(DB, tableName, names); Assert.assertArrayEquals(partVals[0], parts.get(0).getValues().toArray(new String[2])); Assert.assertArrayEquals(partVals[1], parts.get(1).getValues().toArray(new String[2])); - store.dropPartitions(dbName, tableName, names); - List afterDropParts = store.getPartitions(dbName, tableName, -1); + store.dropPartitions(DB, tableName, names); + List afterDropParts = store.getPartitions(DB, tableName, -1); Assert.assertEquals(0, afterDropParts.size()); } @Test public void dropPartition() throws Exception { - String dbName = "default"; String tableName = "myparttable2"; int startTime = (int)(System.currentTimeMillis() / 1000); List cols = new ArrayList(); @@ -545,21 +732,21 @@ public void dropPartition() throws Exception { serde, null, null, emptyParameters); List partCols = new ArrayList(); partCols.add(new FieldSchema("pc", "string", "")); - Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + Table table = new Table(tableName, DB, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null); store.createTable(table); List vals = Arrays.asList("fred"); StorageDescriptor psd = new StorageDescriptor(sd); psd.setLocation("file:/tmp/pc=fred"); - Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + Partition part = new Partition(vals, DB, tableName, startTime, startTime, psd, emptyParameters); store.addPartition(part); - Assert.assertNotNull(store.getPartition(dbName, tableName, vals)); - store.dropPartition(dbName, tableName, vals); + Assert.assertNotNull(store.getPartition(DB, tableName, vals)); + store.dropPartition(DB, tableName, vals); thrown.expect(NoSuchObjectException.class); - store.getPartition(dbName, tableName, vals); + store.getPartition(DB, tableName, vals); } @Test @@ -588,315 +775,551 @@ public void dropRole() throws Exception { } // Due to the way our mock stuff works, we can only insert one column at a time, so we'll test - // each stat type separately. We'll test them together in hte integration tests. + // each stat type separately. We'll test them together in the integration tests. @Test public void booleanTableStatistics() throws Exception { + // Add a boolean table stats for BOOLEAN_COL to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. - long now = System.currentTimeMillis(); - String dbname = "default"; - String tableName = "statstable"; - String boolcol = "boolcol"; - long trues = 37; - long falses = 12; - long booleanNulls = 2; - ColumnStatistics stats = new ColumnStatistics(); - ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); - desc.setLastAnalyzed(now); - desc.setDbName(dbname); - desc.setTableName(tableName); - desc.setIsTblLevel(true); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); - - ColumnStatisticsObj obj = new ColumnStatisticsObj(); - obj.setColName(boolcol); - obj.setColType("boolean"); - ColumnStatisticsData data = new ColumnStatisticsData(); - BooleanColumnStatsData boolData = new BooleanColumnStatsData(); - boolData.setNumTrues(trues); - boolData.setNumFalses(falses); - boolData.setNumNulls(booleanNulls); - data.setBooleanStats(boolData); - obj.setStatsData(data); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = booleanColStatsObjs.get(0); + BooleanColumnStatsData boolData = obj.getStatsData().getBooleanStats(); + // Add to DB stats.addToStatsObj(obj); - store.updateTableColumnStatistics(stats); - - stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol)); - Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); - Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); - Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); - Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); - - Assert.assertEquals(1, stats.getStatsObjSize()); - ColumnStatisticsData colData = obj.getStatsData(); - Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, colData.getSetField()); - boolData = colData.getBooleanStats(); - Assert.assertEquals(trues, boolData.getNumTrues()); - Assert.assertEquals(falses, boolData.getNumFalses()); - Assert.assertEquals(booleanNulls, boolData.getNumNulls()); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(BOOLEAN_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, dataFromDB.getSetField()); + // Compare BooleanColumnStatsData + BooleanColumnStatsData boolDataFromDB = dataFromDB.getBooleanStats(); + Assert.assertEquals(boolData.getNumTrues(), boolDataFromDB.getNumTrues()); + Assert.assertEquals(boolData.getNumFalses(), boolDataFromDB.getNumFalses()); + Assert.assertEquals(boolData.getNumNulls(), boolDataFromDB.getNumNulls()); } @Test public void longTableStatistics() throws Exception { + // Add a long table stats for LONG_COL to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. - long now = System.currentTimeMillis(); - String dbname = "default"; - String tableName = "statstable"; - String longcol = "longcol"; - long longHigh = 120938479124L; - long longLow = -12341243213412124L; - long longNulls = 23; - long longDVs = 213L; - ColumnStatistics stats = new ColumnStatistics(); - ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); - desc.setLastAnalyzed(now); - desc.setDbName(dbname); - desc.setTableName(tableName); - desc.setIsTblLevel(true); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); - - ColumnStatisticsObj obj = new ColumnStatisticsObj(); - obj.setColName(longcol); - obj.setColType("long"); - ColumnStatisticsData data = new ColumnStatisticsData(); - LongColumnStatsData longData = new LongColumnStatsData(); - longData.setHighValue(longHigh); - longData.setLowValue(longLow); - longData.setNumNulls(longNulls); - longData.setNumDVs(longDVs); - data.setLongStats(longData); - obj.setStatsData(data); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = longColStatsObjs.get(0); + LongColumnStatsData longData = obj.getStatsData().getLongStats(); + // Add to DB stats.addToStatsObj(obj); - store.updateTableColumnStatistics(stats); - - stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(longcol)); - Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); - Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); - Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); - Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); - - Assert.assertEquals(1, stats.getStatsObjSize()); - ColumnStatisticsData colData = obj.getStatsData(); - Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, colData.getSetField()); - longData = colData.getLongStats(); - Assert.assertEquals(longHigh, longData.getHighValue()); - Assert.assertEquals(longLow, longData.getLowValue()); - Assert.assertEquals(longNulls, longData.getNumNulls()); - Assert.assertEquals(longDVs, longData.getNumDVs()); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField()); + // Compare LongColumnStatsData + LongColumnStatsData longDataFromDB = dataFromDB.getLongStats(); + Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue()); + Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue()); + Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls()); + Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs()); } @Test public void doubleTableStatistics() throws Exception { + // Add a double table stats for DOUBLE_COL to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. - long now = System.currentTimeMillis(); - String dbname = "default"; - String tableName = "statstable"; - String doublecol = "doublecol"; - double doubleHigh = 123423.23423; - double doubleLow = 0.00001234233; - long doubleNulls = 92; - long doubleDVs = 1234123421L; - ColumnStatistics stats = new ColumnStatistics(); - ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); - desc.setLastAnalyzed(now); - desc.setDbName(dbname); - desc.setTableName(tableName); - desc.setIsTblLevel(true); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); - - ColumnStatisticsObj obj = new ColumnStatisticsObj(); - obj.setColName(doublecol); - obj.setColType("double"); - ColumnStatisticsData data = new ColumnStatisticsData(); - DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); - doubleData.setHighValue(doubleHigh); - doubleData.setLowValue(doubleLow); - doubleData.setNumNulls(doubleNulls); - doubleData.setNumDVs(doubleDVs); - data.setDoubleStats(doubleData); - obj.setStatsData(data); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = doubleColStatsObjs.get(0); + DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats(); + // Add to DB stats.addToStatsObj(obj); - store.updateTableColumnStatistics(stats); - - stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(doublecol)); - Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); - Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); - Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); - Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); - - Assert.assertEquals(1, stats.getStatsObjSize()); - ColumnStatisticsData colData = obj.getStatsData(); - Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, colData.getSetField()); - doubleData = colData.getDoubleStats(); - Assert.assertEquals(doubleHigh, doubleData.getHighValue(), 0.01); - Assert.assertEquals(doubleLow, doubleData.getLowValue(), 0.01); - Assert.assertEquals(doubleNulls, doubleData.getNumNulls()); - Assert.assertEquals(doubleDVs, doubleData.getNumDVs()); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField()); + // Compare DoubleColumnStatsData + DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats(); + Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01); + Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01); + Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls()); + Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs()); } @Test public void stringTableStatistics() throws Exception { + // Add a string table stats for STRING_COL to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. - long now = System.currentTimeMillis(); - String dbname = "default"; - String tableName = "statstable"; - String stringcol = "stringcol"; - long strMaxLen = 1234; - double strAvgLen = 32.3; - long strNulls = 987; - long strDVs = 906; - ColumnStatistics stats = new ColumnStatistics(); - ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); - desc.setLastAnalyzed(now); - desc.setDbName(dbname); - desc.setTableName(tableName); - desc.setIsTblLevel(true); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); - - ColumnStatisticsObj obj = new ColumnStatisticsObj(); - obj.setColName(stringcol); - obj.setColType("string"); - ColumnStatisticsData data = new ColumnStatisticsData(); - StringColumnStatsData strData = new StringColumnStatsData(); - strData.setMaxColLen(strMaxLen); - strData.setAvgColLen(strAvgLen); - strData.setNumNulls(strNulls); - strData.setNumDVs(strDVs); - data.setStringStats(strData); - obj.setStatsData(data); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = stringColStatsObjs.get(0); + StringColumnStatsData stringData = obj.getStatsData().getStringStats(); + // Add to DB stats.addToStatsObj(obj); - store.updateTableColumnStatistics(stats); - - stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(stringcol)); - Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); - Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); - Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); - Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); - - Assert.assertEquals(1, stats.getStatsObjSize()); - ColumnStatisticsData colData = obj.getStatsData(); - Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, colData.getSetField()); - strData = colData.getStringStats(); - Assert.assertEquals(strMaxLen, strData.getMaxColLen()); - Assert.assertEquals(strAvgLen, strData.getAvgColLen(), 0.01); - Assert.assertEquals(strNulls, strData.getNumNulls()); - Assert.assertEquals(strDVs, strData.getNumDVs()); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(STRING_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField()); + // Compare StringColumnStatsData + StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats(); + Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen()); + Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01); + Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls()); + Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs()); } @Test public void binaryTableStatistics() throws Exception { + // Add a binary table stats for BINARY_COL to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. - long now = System.currentTimeMillis(); - String dbname = "default"; - String tableName = "statstable"; - String binarycol = "bincol"; - long binMaxLen = 123412987L; - double binAvgLen = 76.98; - long binNulls = 976998797L; - ColumnStatistics stats = new ColumnStatistics(); - ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); - desc.setLastAnalyzed(now); - desc.setDbName(dbname); - desc.setTableName(tableName); - desc.setIsTblLevel(true); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); stats.setStatsDesc(desc); - - ColumnStatisticsObj obj = new ColumnStatisticsObj(); - obj.setColName(binarycol); - obj.setColType("binary"); - ColumnStatisticsData data = new ColumnStatisticsData(); - BinaryColumnStatsData binData = new BinaryColumnStatsData(); - binData.setMaxColLen(binMaxLen); - binData.setAvgColLen(binAvgLen); - binData.setNumNulls(binNulls); - data.setBinaryStats(binData); - obj.setStatsData(data); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = binaryColStatsObjs.get(0); + BinaryColumnStatsData binaryData = obj.getStatsData().getBinaryStats(); + // Add to DB stats.addToStatsObj(obj); + store.updateTableColumnStatistics(stats); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(BINARY_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.BINARY_STATS, dataFromDB.getSetField()); + // Compare BinaryColumnStatsData + BinaryColumnStatsData binaryDataFromDB = dataFromDB.getBinaryStats(); + Assert.assertEquals(binaryData.getMaxColLen(), binaryDataFromDB.getMaxColLen()); + Assert.assertEquals(binaryData.getAvgColLen(), binaryDataFromDB.getAvgColLen(), 0.01); + Assert.assertEquals(binaryData.getNumNulls(), binaryDataFromDB.getNumNulls()); + } + @Test + public void decimalTableStatistics() throws Exception { + // Add a decimal table stats for DECIMAL_COL to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = decimalColStatsObjs.get(0); + DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats(); + // Add to DB + stats.addToStatsObj(obj); store.updateTableColumnStatistics(stats); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField()); + // Compare DecimalColumnStatsData + DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats(); + Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue()); + Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue()); + Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls()); + Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs()); + } - stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(binarycol)); - Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); - Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); - Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); - Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + @Test + public void booleanPartitionStatistics() throws Exception { + // Add partition stats for: BOOLEAN_COL and partition: {PART_KEYS(0), PART_VALS(0)} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(0, 0); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = booleanColStatsObjs.get(0); + BooleanColumnStatsData boolData = obj.getStatsData().getBooleanStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(PART_VALS.get(0)); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, dataFromDB.getSetField()); + // Compare BooleanColumnStatsData + BooleanColumnStatsData boolDataFromDB = dataFromDB.getBooleanStats(); + Assert.assertEquals(boolData.getNumTrues(), boolDataFromDB.getNumTrues()); + Assert.assertEquals(boolData.getNumFalses(), boolDataFromDB.getNumFalses()); + Assert.assertEquals(boolData.getNumNulls(), boolDataFromDB.getNumNulls()); + } - Assert.assertEquals(1, stats.getStatsObjSize()); - ColumnStatisticsData colData = obj.getStatsData(); - Assert.assertEquals(ColumnStatisticsData._Fields.BINARY_STATS, colData.getSetField()); - binData = colData.getBinaryStats(); - Assert.assertEquals(binMaxLen, binData.getMaxColLen()); - Assert.assertEquals(binAvgLen, binData.getAvgColLen(), 0.01); - Assert.assertEquals(binNulls, binData.getNumNulls()); + @Test + public void longPartitionStatistics() throws Exception { + // Add partition stats for: LONG_COL and partition: {PART_KEYS(0), PART_VALS(0)} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(0, 0); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = longColStatsObjs.get(0); + LongColumnStatsData longData = obj.getStatsData().getLongStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(PART_VALS.get(0)); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField()); + // Compare LongColumnStatsData + LongColumnStatsData longDataFromDB = dataFromDB.getLongStats(); + Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue()); + Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue()); + Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls()); + Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs()); } @Test - public void decimalTableStatistics() throws Exception { + public void doublePartitionStatistics() throws Exception { + // Add partition stats for: DOUBLE_COL and partition: {PART_KEYS(0), PART_VALS(0)} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(0, 0); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = doubleColStatsObjs.get(0); + DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(PART_VALS.get(0)); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField()); + // Compare DoubleColumnStatsData + DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats(); + Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01); + Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01); + Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls()); + Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs()); + } + + @Test + public void stringPartitionStatistics() throws Exception { + // Add partition stats for: STRING_COL and partition: {PART_KEYS(0), PART_VALS(0)} to DB // Because of the way our mock implementation works we actually need to not create the table // before we set statistics on it. - long now = System.currentTimeMillis(); - String dbname = "default"; - String tableName = "statstable"; - String decimalcol = "deccol"; - Decimal decHigh = new Decimal(); - decHigh.setScale((short)3); - decHigh.setUnscaled("3876".getBytes()); // I have not clue how this is translated, but it - // doesn't matter - Decimal decLow = new Decimal(); - decLow.setScale((short)3); - decLow.setUnscaled("38".getBytes()); - long decNulls = 13; - long decDVs = 923947293L; + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(0, 0); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = stringColStatsObjs.get(0); + StringColumnStatsData stringData = obj.getStatsData().getStringStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(PART_VALS.get(0)); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField()); + // Compare StringColumnStatsData + StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats(); + Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen()); + Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01); + Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls()); + Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs()); + } + @Test + public void binaryPartitionStatistics() throws Exception { + // Add partition stats for: BINARY_COL and partition: {PART_KEYS(0), PART_VALS(0)} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. ColumnStatistics stats = new ColumnStatistics(); - ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); - desc.setLastAnalyzed(now); - desc.setDbName(dbname); - desc.setTableName(tableName); - desc.setIsTblLevel(true); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(0, 0); stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = binaryColStatsObjs.get(0); + BinaryColumnStatsData binaryData = obj.getStatsData().getBinaryStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(PART_VALS.get(0)); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.BINARY_STATS, dataFromDB.getSetField()); + // Compare BinaryColumnStatsData + BinaryColumnStatsData binaryDataFromDB = dataFromDB.getBinaryStats(); + Assert.assertEquals(binaryData.getMaxColLen(), binaryDataFromDB.getMaxColLen()); + Assert.assertEquals(binaryData.getAvgColLen(), binaryDataFromDB.getAvgColLen(), 0.01); + Assert.assertEquals(binaryData.getNumNulls(), binaryDataFromDB.getNumNulls()); + } - ColumnStatisticsObj obj = new ColumnStatisticsObj(); - obj.setColName(decimalcol); - obj.setColType("decimal(5,3)"); - ColumnStatisticsData data = new ColumnStatisticsData(); - DecimalColumnStatsData decData = new DecimalColumnStatsData(); - decData.setHighValue(decHigh); - decData.setLowValue(decLow); - decData.setNumNulls(decNulls); - decData.setNumDVs(decDVs); - data.setDecimalStats(decData); - obj.setStatsData(data); + @Test + public void decimalPartitionStatistics() throws Exception { + // Add partition stats for: DECIMAL_COL and partition: {PART_KEYS(0), PART_VALS(0)} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(0, 0); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = decimalColStatsObjs.get(0); + DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats(); + // Add to DB stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(PART_VALS.get(0)); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField()); + // Compare DecimalColumnStatsData + DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats(); + Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue()); + Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue()); + Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls()); + Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs()); + } - store.updateTableColumnStatistics(stats); + // TODO: Activate this test, when we are able to mock the HBaseReadWrite.NO_CACHE_CONF set to false + // Right now, I have tested this by using aggrStatsCache despite NO_CACHE_CONF set to true + // Also need to add tests for other data types + refactor a lot of duplicate code in stats testing + //@Test + public void AggrStats() throws Exception { + int numParts = 3; + ColumnStatistics stats; + ColumnStatisticsDesc desc; + ColumnStatisticsObj obj; + List partNames = new ArrayList(); + List colNames = new ArrayList(); + colNames.add(BOOLEAN_COL); + // Add boolean col stats to DB for numParts partitions: + // PART_VALS(0), PART_VALS(1) & PART_VALS(2) for PART_KEYS(0) + for (int i = 0; i < numParts; i++) { + stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + desc = getMockPartColStatsDesc(0, i); + stats.setStatsDesc(desc); + partNames.add(desc.getPartName()); + // Get one of the pre-created ColumnStatisticsObj + obj = booleanColStatsObjs.get(i); + stats.addToStatsObj(obj); + // Add to DB + List parVals = new ArrayList(); + parVals.add(PART_VALS.get(i)); + store.updatePartitionColumnStatistics(stats, parVals); + } + // Read aggregate stats + AggrStats aggrStatsFromDB = store.get_aggr_stats_for(DB, TBL, partNames, colNames); + // Verify + Assert.assertEquals(1, aggrStatsFromDB.getColStatsSize()); + ColumnStatisticsObj objFromDB = aggrStatsFromDB.getColStats().get(0); + Assert.assertNotNull(objFromDB); + // Aggregate our mock values + long numTrues = 0, numFalses = 0, numNulls = 0; + BooleanColumnStatsData boolData;; + for (int i = 0; i < numParts; i++) { + boolData = booleanColStatsObjs.get(i).getStatsData().getBooleanStats(); + numTrues = numTrues + boolData.getNumTrues(); + numFalses = numFalses + boolData.getNumFalses(); + numNulls = numNulls + boolData.getNumNulls(); + } + // Compare with what we got from the method call + BooleanColumnStatsData boolDataFromDB = objFromDB.getStatsData().getBooleanStats(); + Assert.assertEquals(numTrues, boolDataFromDB.getNumTrues()); + Assert.assertEquals(numFalses, boolDataFromDB.getNumFalses()); + Assert.assertEquals(numNulls, boolDataFromDB.getNumNulls()); + } + + /** + * Returns a dummy table level ColumnStatisticsDesc with default values + */ + private ColumnStatisticsDesc getMockTblColStatsDesc() { + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(DEFAULT_TIME); + desc.setDbName(DB); + desc.setTableName(TBL); + desc.setIsTblLevel(true); + return desc; + } - stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(decimalcol)); - Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); - Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); - Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); - Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); - - Assert.assertEquals(1, stats.getStatsObjSize()); - ColumnStatisticsData colData = obj.getStatsData(); - Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, colData.getSetField()); - decData = colData.getDecimalStats(); - Assert.assertEquals(decHigh, decData.getHighValue()); - Assert.assertEquals(decLow, decData.getLowValue()); - Assert.assertEquals(decNulls, decData.getNumNulls()); - Assert.assertEquals(decDVs, decData.getNumDVs()); + /** + * Returns a dummy partition level ColumnStatisticsDesc + */ + private ColumnStatisticsDesc getMockPartColStatsDesc(int partKeyIndex, int partValIndex) { + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(DEFAULT_TIME); + desc.setDbName(DB); + desc.setTableName(TBL); + // part1=val1 + desc.setPartName(PART_KEYS.get(partKeyIndex) + PART_KV_SEPARATOR + PART_VALS.get(partValIndex)); + desc.setIsTblLevel(false); + return desc; } + }