diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 21d1b46..7a2c2f0 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -1499,28 +1499,28 @@ public void testStatsFastTrivial() throws Throwable { assertEquals(4,partNames.size()); // Test for both colNames and partNames being empty: - AggrStats aggrStatsEmpty = client.getAggrColStatsFor(dbName,tblName,emptyColNames,emptyPartNames); + AggrStats aggrStatsEmpty = client.getAggrColStatsFor(dbName,tblName,emptyColNames,emptyPartNames, -1); assertNotNull(aggrStatsEmpty); // short-circuited on client-side, verifying that it's an empty object, not null assertEquals(0,aggrStatsEmpty.getPartsFound()); assertNotNull(aggrStatsEmpty.getColStats()); assert(aggrStatsEmpty.getColStats().isEmpty()); // Test for only colNames being empty - AggrStats aggrStatsOnlyParts = client.getAggrColStatsFor(dbName,tblName,emptyColNames,partNames); + AggrStats aggrStatsOnlyParts = client.getAggrColStatsFor(dbName,tblName,emptyColNames,partNames, -1); assertNotNull(aggrStatsOnlyParts); // short-circuited on client-side, verifying that it's an empty object, not null assertEquals(0,aggrStatsOnlyParts.getPartsFound()); assertNotNull(aggrStatsOnlyParts.getColStats()); assert(aggrStatsOnlyParts.getColStats().isEmpty()); // Test for only partNames being empty - AggrStats aggrStatsOnlyCols = client.getAggrColStatsFor(dbName,tblName,colNames,emptyPartNames); + AggrStats aggrStatsOnlyCols = client.getAggrColStatsFor(dbName,tblName,colNames,emptyPartNames, -1); assertNotNull(aggrStatsOnlyCols); // short-circuited on client-side, verifying that it's an empty object, not null assertEquals(0,aggrStatsOnlyCols.getPartsFound()); assertNotNull(aggrStatsOnlyCols.getColStats()); assert(aggrStatsOnlyCols.getColStats().isEmpty()); // Test for valid values for both. - AggrStats aggrStatsFull = client.getAggrColStatsFor(dbName,tblName,colNames,partNames); + AggrStats aggrStatsFull = client.getAggrColStatsFor(dbName,tblName,colNames,partNames, -1); assertNotNull(aggrStatsFull); assertEquals(0,aggrStatsFull.getPartsFound()); // would still be empty, because no stats are actually populated. assertNotNull(aggrStatsFull.getColStats()); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggrStatsCacheIntegration.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggrStatsCacheIntegration.java index 51d96dd..f0fc0c7 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggrStatsCacheIntegration.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggrStatsCacheIntegration.java @@ -162,7 +162,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { }; AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1", "col2")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1", "col2"), -1); statChecker.checkStats(aggrStats); // Check that we had to build it from the stats @@ -173,7 +173,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // Call again, this time it should come from memory. Also, reverse the name order this time // to assure that we still hit. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1", "col2")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1", "col2"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -183,7 +183,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { store.backdoor().getStatsCache().flushMemory(); // Call again, this time it should come from hbase aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1", "col2")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1", "col2"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(2, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -259,7 +259,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { }; AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); // Check that we had to build it from the stats @@ -270,7 +270,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // Call again, this time it should come from memory. Also, reverse the name order this time // to assure that we still hit. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -280,7 +280,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { store.backdoor().getStatsCache().flushMemory(); // Call again, this time it should come from hbase aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(1, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -352,7 +352,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { }; AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); // Check that we had to build it from the stats @@ -363,7 +363,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // Call again, this time it should come from memory. Also, reverse the name order this time // to assure that we still hit. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -372,7 +372,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // Now call a different combination to get it in memory too aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -380,7 +380,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { Assert.assertEquals(2, store.backdoor().getStatsCache().misses.getCnt()); aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -392,7 +392,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { store.backdoor().getStatsCache().wakeInvalidator(); aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -438,7 +438,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { store.backdoor().getStatsCache().wakeInvalidator(); aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1"), -1); afterUpdate.checkStats(aggrStats); // Check that we missed, which means this aggregate was dropped from the cache. @@ -448,7 +448,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // Check that our other aggregate is still in the cache. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -461,7 +461,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { store.backdoor().getStatsCache().wakeInvalidator(); aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); new Checker() { @Override public void checkStats(AggrStats aggrStats) throws Exception { @@ -484,7 +484,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // Check that our other aggregate is still in the cache. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1"), -1); afterUpdate.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -549,9 +549,9 @@ public void alterInvalidation() throws Exception { } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=tomorrow"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=tomorrow"), Arrays.asList("col1"), -1); aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); // Check that we had to build it from the stats Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -570,7 +570,7 @@ public void alterInvalidation() throws Exception { store.backdoor().getStatsCache().wakeInvalidator(); aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1"), -1); // Check that we missed, which means this aggregate was dropped from the cache. Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -579,7 +579,7 @@ public void alterInvalidation() throws Exception { // Check that our other aggregate is still in the cache. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); Assert.assertEquals(4, store.backdoor().getStatsCache().totalGets.getCnt()); @@ -643,9 +643,9 @@ public void altersInvalidation() throws Exception { } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=tomorrow"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=tomorrow"), Arrays.asList("col1"), -1); aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); // Check that we had to build it from the stats Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -668,7 +668,7 @@ public void altersInvalidation() throws Exception { store.backdoor().getStatsCache().wakeInvalidator(); aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=tomorrow", "ds=today"), Arrays.asList("col1"), -1); // Check that we missed, which means this aggregate was dropped from the cache. Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -677,7 +677,7 @@ public void altersInvalidation() throws Exception { // Check that our other aggregate got dropped too aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); Assert.assertEquals(4, store.backdoor().getStatsCache().totalGets.getCnt()); diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift index 4d92b73..4e094d4 100755 --- a/metastore/if/hive_metastore.thrift +++ b/metastore/if/hive_metastore.thrift @@ -544,7 +544,8 @@ struct PartitionsStatsRequest { 1: required string dbName, 2: required string tblName, 3: required list colNames, - 4: required list partNames + 4: required list partNames, + 5: optional i32 numPartitions=-1 } // Return type for add_partitions_req diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index 79460a8..1bc82f0 100644 --- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -10093,6 +10093,11 @@ void PartitionsStatsRequest::__set_partNames(const std::vector & va this->partNames = val; } +void PartitionsStatsRequest::__set_numPartitions(const int32_t val) { + this->numPartitions = val; +__isset.numPartitions = true; +} + uint32_t PartitionsStatsRequest::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -10174,6 +10179,14 @@ uint32_t PartitionsStatsRequest::read(::apache::thrift::protocol::TProtocol* ipr xfer += iprot->skip(ftype); } break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->numPartitions); + this->__isset.numPartitions = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -10231,6 +10244,11 @@ uint32_t PartitionsStatsRequest::write(::apache::thrift::protocol::TProtocol* op } xfer += oprot->writeFieldEnd(); + if (this->__isset.numPartitions) { + xfer += oprot->writeFieldBegin("numPartitions", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->numPartitions); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -10242,6 +10260,8 @@ void swap(PartitionsStatsRequest &a, PartitionsStatsRequest &b) { swap(a.tblName, b.tblName); swap(a.colNames, b.colNames); swap(a.partNames, b.partNames); + swap(a.numPartitions, b.numPartitions); + swap(a.__isset, b.__isset); } PartitionsStatsRequest::PartitionsStatsRequest(const PartitionsStatsRequest& other449) { @@ -10249,12 +10269,16 @@ PartitionsStatsRequest::PartitionsStatsRequest(const PartitionsStatsRequest& oth tblName = other449.tblName; colNames = other449.colNames; partNames = other449.partNames; + numPartitions = other449.numPartitions; + __isset = other449.__isset; } PartitionsStatsRequest& PartitionsStatsRequest::operator=(const PartitionsStatsRequest& other450) { dbName = other450.dbName; tblName = other450.tblName; colNames = other450.colNames; partNames = other450.partNames; + numPartitions = other450.numPartitions; + __isset = other450.__isset; return *this; } void PartitionsStatsRequest::printTo(std::ostream& out) const { @@ -10264,6 +10288,7 @@ void PartitionsStatsRequest::printTo(std::ostream& out) const { out << ", " << "tblName=" << to_string(tblName); out << ", " << "colNames=" << to_string(colNames); out << ", " << "partNames=" << to_string(partNames); + out << ", " << "numPartitions="; (__isset.numPartitions ? (out << to_string(numPartitions)) : (out << "")); out << ")"; } diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index ec81798..17562fe 100644 --- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -4193,13 +4193,17 @@ inline std::ostream& operator<<(std::ostream& out, const TableStatsRequest& obj) return out; } +typedef struct _PartitionsStatsRequest__isset { + _PartitionsStatsRequest__isset() : numPartitions(true) {} + bool numPartitions :1; +} _PartitionsStatsRequest__isset; class PartitionsStatsRequest { public: PartitionsStatsRequest(const PartitionsStatsRequest&); PartitionsStatsRequest& operator=(const PartitionsStatsRequest&); - PartitionsStatsRequest() : dbName(), tblName() { + PartitionsStatsRequest() : dbName(), tblName(), numPartitions(-1) { } virtual ~PartitionsStatsRequest() throw(); @@ -4207,6 +4211,9 @@ class PartitionsStatsRequest { std::string tblName; std::vector colNames; std::vector partNames; + int32_t numPartitions; + + _PartitionsStatsRequest__isset __isset; void __set_dbName(const std::string& val); @@ -4216,6 +4223,8 @@ class PartitionsStatsRequest { void __set_partNames(const std::vector & val); + void __set_numPartitions(const int32_t val); + bool operator == (const PartitionsStatsRequest & rhs) const { if (!(dbName == rhs.dbName)) @@ -4226,6 +4235,10 @@ class PartitionsStatsRequest { return false; if (!(partNames == rhs.partNames)) return false; + if (__isset.numPartitions != rhs.__isset.numPartitions) + return false; + else if (__isset.numPartitions && !(numPartitions == rhs.numPartitions)) + return false; return true; } bool operator != (const PartitionsStatsRequest &rhs) const { diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/PartitionsStatsRequest.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/PartitionsStatsRequest.java index 65b8a54..0e612af 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/PartitionsStatsRequest.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/PartitionsStatsRequest.java @@ -42,6 +42,7 @@ private static final org.apache.thrift.protocol.TField TBL_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tblName", org.apache.thrift.protocol.TType.STRING, (short)2); private static final org.apache.thrift.protocol.TField COL_NAMES_FIELD_DESC = new org.apache.thrift.protocol.TField("colNames", org.apache.thrift.protocol.TType.LIST, (short)3); private static final org.apache.thrift.protocol.TField PART_NAMES_FIELD_DESC = new org.apache.thrift.protocol.TField("partNames", org.apache.thrift.protocol.TType.LIST, (short)4); + private static final org.apache.thrift.protocol.TField NUM_PARTITIONS_FIELD_DESC = new org.apache.thrift.protocol.TField("numPartitions", org.apache.thrift.protocol.TType.I32, (short)5); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -53,13 +54,15 @@ private String tblName; // required private List colNames; // required private List partNames; // required + private int numPartitions; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { DB_NAME((short)1, "dbName"), TBL_NAME((short)2, "tblName"), COL_NAMES((short)3, "colNames"), - PART_NAMES((short)4, "partNames"); + PART_NAMES((short)4, "partNames"), + NUM_PARTITIONS((short)5, "numPartitions"); private static final Map byName = new HashMap(); @@ -82,6 +85,8 @@ public static _Fields findByThriftId(int fieldId) { return COL_NAMES; case 4: // PART_NAMES return PART_NAMES; + case 5: // NUM_PARTITIONS + return NUM_PARTITIONS; default: return null; } @@ -122,6 +127,9 @@ public String getFieldName() { } // isset id assignments + private static final int __NUMPARTITIONS_ISSET_ID = 0; + private byte __isset_bitfield = 0; + private static final _Fields optionals[] = {_Fields.NUM_PARTITIONS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -135,11 +143,15 @@ public String getFieldName() { tmpMap.put(_Fields.PART_NAMES, new org.apache.thrift.meta_data.FieldMetaData("partNames", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))); + tmpMap.put(_Fields.NUM_PARTITIONS, new org.apache.thrift.meta_data.FieldMetaData("numPartitions", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(PartitionsStatsRequest.class, metaDataMap); } public PartitionsStatsRequest() { + this.numPartitions = -1; + } public PartitionsStatsRequest( @@ -159,6 +171,7 @@ public PartitionsStatsRequest( * Performs a deep copy on other. */ public PartitionsStatsRequest(PartitionsStatsRequest other) { + __isset_bitfield = other.__isset_bitfield; if (other.isSetDbName()) { this.dbName = other.dbName; } @@ -173,6 +186,7 @@ public PartitionsStatsRequest(PartitionsStatsRequest other) { List __this__partNames = new ArrayList(other.partNames); this.partNames = __this__partNames; } + this.numPartitions = other.numPartitions; } public PartitionsStatsRequest deepCopy() { @@ -185,6 +199,8 @@ public void clear() { this.tblName = null; this.colNames = null; this.partNames = null; + this.numPartitions = -1; + } public String getDbName() { @@ -309,6 +325,28 @@ public void setPartNamesIsSet(boolean value) { } } + public int getNumPartitions() { + return this.numPartitions; + } + + public void setNumPartitions(int numPartitions) { + this.numPartitions = numPartitions; + setNumPartitionsIsSet(true); + } + + public void unsetNumPartitions() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NUMPARTITIONS_ISSET_ID); + } + + /** Returns true if field numPartitions is set (has been assigned a value) and false otherwise */ + public boolean isSetNumPartitions() { + return EncodingUtils.testBit(__isset_bitfield, __NUMPARTITIONS_ISSET_ID); + } + + public void setNumPartitionsIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMPARTITIONS_ISSET_ID, value); + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case DB_NAME: @@ -343,6 +381,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case NUM_PARTITIONS: + if (value == null) { + unsetNumPartitions(); + } else { + setNumPartitions((Integer)value); + } + break; + } } @@ -360,6 +406,9 @@ public Object getFieldValue(_Fields field) { case PART_NAMES: return getPartNames(); + case NUM_PARTITIONS: + return getNumPartitions(); + } throw new IllegalStateException(); } @@ -379,6 +428,8 @@ public boolean isSet(_Fields field) { return isSetColNames(); case PART_NAMES: return isSetPartNames(); + case NUM_PARTITIONS: + return isSetNumPartitions(); } throw new IllegalStateException(); } @@ -432,6 +483,15 @@ public boolean equals(PartitionsStatsRequest that) { return false; } + boolean this_present_numPartitions = true && this.isSetNumPartitions(); + boolean that_present_numPartitions = true && that.isSetNumPartitions(); + if (this_present_numPartitions || that_present_numPartitions) { + if (!(this_present_numPartitions && that_present_numPartitions)) + return false; + if (this.numPartitions != that.numPartitions) + return false; + } + return true; } @@ -459,6 +519,11 @@ public int hashCode() { if (present_partNames) list.add(partNames); + boolean present_numPartitions = true && (isSetNumPartitions()); + list.add(present_numPartitions); + if (present_numPartitions) + list.add(numPartitions); + return list.hashCode(); } @@ -510,6 +575,16 @@ public int compareTo(PartitionsStatsRequest other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetNumPartitions()).compareTo(other.isSetNumPartitions()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNumPartitions()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.numPartitions, other.numPartitions); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -561,6 +636,12 @@ public String toString() { sb.append(this.partNames); } first = false; + if (isSetNumPartitions()) { + if (!first) sb.append(", "); + sb.append("numPartitions:"); + sb.append(this.numPartitions); + first = false; + } sb.append(")"); return sb.toString(); } @@ -596,6 +677,8 @@ private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOExcept private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bitfield = 0; read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); } catch (org.apache.thrift.TException te) { throw new java.io.IOException(te); @@ -672,6 +755,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, PartitionsStatsRequ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 5: // NUM_PARTITIONS + if (schemeField.type == org.apache.thrift.protocol.TType.I32) { + struct.numPartitions = iprot.readI32(); + struct.setNumPartitionsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -719,6 +810,11 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, PartitionsStatsReq } oprot.writeFieldEnd(); } + if (struct.isSetNumPartitions()) { + oprot.writeFieldBegin(NUM_PARTITIONS_FIELD_DESC); + oprot.writeI32(struct.numPartitions); + oprot.writeFieldEnd(); + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -752,6 +848,14 @@ public void write(org.apache.thrift.protocol.TProtocol prot, PartitionsStatsRequ oprot.writeString(_iter405); } } + BitSet optionals = new BitSet(); + if (struct.isSetNumPartitions()) { + optionals.set(0); + } + oprot.writeBitSet(optionals, 1); + if (struct.isSetNumPartitions()) { + oprot.writeI32(struct.numPartitions); + } } @Override @@ -783,6 +887,11 @@ public void read(org.apache.thrift.protocol.TProtocol prot, PartitionsStatsReque } } struct.setPartNamesIsSet(true); + BitSet incoming = iprot.readBitSet(1); + if (incoming.get(0)) { + struct.numPartitions = iprot.readI32(); + struct.setNumPartitionsIsSet(true); + } } } diff --git a/metastore/src/gen/thrift/gen-php/metastore/Types.php b/metastore/src/gen/thrift/gen-php/metastore/Types.php index f505208..6cd63d2 100644 --- a/metastore/src/gen/thrift/gen-php/metastore/Types.php +++ b/metastore/src/gen/thrift/gen-php/metastore/Types.php @@ -10216,6 +10216,10 @@ class PartitionsStatsRequest { * @var string[] */ public $partNames = null; + /** + * @var int + */ + public $numPartitions = -1; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -10244,6 +10248,10 @@ class PartitionsStatsRequest { 'type' => TType::STRING, ), ), + 5 => array( + 'var' => 'numPartitions', + 'type' => TType::I32, + ), ); } if (is_array($vals)) { @@ -10259,6 +10267,9 @@ class PartitionsStatsRequest { if (isset($vals['partNames'])) { $this->partNames = $vals['partNames']; } + if (isset($vals['numPartitions'])) { + $this->numPartitions = $vals['numPartitions']; + } } } @@ -10329,6 +10340,13 @@ class PartitionsStatsRequest { $xfer += $input->skip($ftype); } break; + case 5: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->numPartitions); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -10386,6 +10404,11 @@ class PartitionsStatsRequest { } $xfer += $output->writeFieldEnd(); } + if ($this->numPartitions !== null) { + $xfer += $output->writeFieldBegin('numPartitions', TType::I32, 5); + $xfer += $output->writeI32($this->numPartitions); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 8d88cd7..0e4fe54 100644 --- a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -7032,6 +7032,7 @@ class PartitionsStatsRequest: - tblName - colNames - partNames + - numPartitions """ thrift_spec = ( @@ -7040,13 +7041,15 @@ class PartitionsStatsRequest: (2, TType.STRING, 'tblName', None, None, ), # 2 (3, TType.LIST, 'colNames', (TType.STRING,None), None, ), # 3 (4, TType.LIST, 'partNames', (TType.STRING,None), None, ), # 4 + (5, TType.I32, 'numPartitions', None, -1, ), # 5 ) - def __init__(self, dbName=None, tblName=None, colNames=None, partNames=None,): + def __init__(self, dbName=None, tblName=None, colNames=None, partNames=None, numPartitions=thrift_spec[5][4],): self.dbName = dbName self.tblName = tblName self.colNames = colNames self.partNames = partNames + self.numPartitions = numPartitions def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -7087,6 +7090,11 @@ def read(self, iprot): iprot.readListEnd() else: iprot.skip(ftype) + elif fid == 5: + if ftype == TType.I32: + self.numPartitions = iprot.readI32() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -7119,6 +7127,10 @@ def write(self, oprot): oprot.writeString(iter363) oprot.writeListEnd() oprot.writeFieldEnd() + if self.numPartitions is not None: + oprot.writeFieldBegin('numPartitions', TType.I32, 5) + oprot.writeI32(self.numPartitions) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -7140,6 +7152,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.tblName) value = (value * 31) ^ hash(self.colNames) value = (value * 31) ^ hash(self.partNames) + value = (value * 31) ^ hash(self.numPartitions) return value def __repr__(self): diff --git a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index 0964cd8..227bccb 100644 --- a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -1566,12 +1566,14 @@ class PartitionsStatsRequest TBLNAME = 2 COLNAMES = 3 PARTNAMES = 4 + NUMPARTITIONS = 5 FIELDS = { DBNAME => {:type => ::Thrift::Types::STRING, :name => 'dbName'}, TBLNAME => {:type => ::Thrift::Types::STRING, :name => 'tblName'}, COLNAMES => {:type => ::Thrift::Types::LIST, :name => 'colNames', :element => {:type => ::Thrift::Types::STRING}}, - PARTNAMES => {:type => ::Thrift::Types::LIST, :name => 'partNames', :element => {:type => ::Thrift::Types::STRING}} + PARTNAMES => {:type => ::Thrift::Types::LIST, :name => 'partNames', :element => {:type => ::Thrift::Types::STRING}}, + NUMPARTITIONS => {:type => ::Thrift::Types::I32, :name => 'numPartitions', :default => -1, :optional => true} } def struct_fields; FIELDS; end diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/AggregateStatsCache.java b/metastore/src/java/org/apache/hadoop/hive/metastore/AggregateStatsCache.java index 4ab178c..2ea556e 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/AggregateStatsCache.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/AggregateStatsCache.java @@ -240,25 +240,27 @@ private AggrColStats findBestMatch(List partNames, List ca } // We'll count misses as we iterate int maxMisses = (int) maxVariance * numPartsRequested; - for (String partName : partNames) { - for (Iterator> iterator = candidateMatchStats.entrySet().iterator(); iterator.hasNext();) { - Map.Entry entry = iterator.next(); - AggrColStats candidate = entry.getKey(); - matchStats = entry.getValue(); - if (candidate.getBloomFilter().test(partName.getBytes())) { - ++matchStats.hits; - } else { - ++matchStats.misses; - } - // 2nd pass at removing invalid candidates - // If misses so far exceed max tolerable misses - if (matchStats.misses > maxMisses) { - iterator.remove(); - continue; - } - // Check if this is the best match so far - if (matchStats.hits > bestMatchHits) { - bestMatch = candidate; + if (partNames != null) { + for (String partName : partNames) { + for (Iterator> iterator = candidateMatchStats.entrySet().iterator(); iterator.hasNext();) { + Map.Entry entry = iterator.next(); + AggrColStats candidate = entry.getKey(); + matchStats = entry.getValue(); + if (candidate.getBloomFilter().test(partName.getBytes())) { + ++matchStats.hits; + } else { + ++matchStats.misses; + } + // 2nd pass at removing invalid candidates + // If misses so far exceed max tolerable misses + if (matchStats.misses > maxMisses) { + iterator.remove(); + continue; + } + // Check if this is the best match so far + if (matchStats.hits > bestMatchHits) { + bestMatch = candidate; + } } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 38c0eed..899109f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -6099,21 +6099,25 @@ public AggrStats get_aggr_stats_for(PartitionsStatsRequest request) throws NoSuchObjectException, MetaException, TException { String dbName = request.getDbName().toLowerCase(); String tblName = request.getTblName().toLowerCase(); + int numPartitions = request.getNumPartitions(); startFunction("get_aggr_stats_for: db=" + request.getDbName() + " table=" + request.getTblName()); List lowerCaseColNames = new ArrayList(request.getColNames().size()); for (String colName : request.getColNames()) { lowerCaseColNames.add(colName.toLowerCase()); } - List lowerCasePartNames = new ArrayList(request.getPartNames().size()); - for (String partName : request.getPartNames()) { - lowerCasePartNames.add(lowerCaseConvertPartName(partName)); + List lowerCasePartNames = null; + if (request.getPartNames() != null) { + lowerCasePartNames = new ArrayList(request.getPartNames().size()); + for (String partName : request.getPartNames()) { + lowerCasePartNames.add(lowerCaseConvertPartName(partName)); + } } AggrStats aggrStats = null; try { aggrStats = new AggrStats(getMS().get_aggr_stats_for(dbName, tblName, lowerCasePartNames, - lowerCaseColNames)); + lowerCaseColNames, numPartitions)); return aggrStats; } finally { endFunction("get_partitions_statistics_req: ", aggrStats == null, null, request.getTblName()); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 909d8eb..2e50364 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -2344,12 +2344,15 @@ protected void drop_table_with_environment_context(String dbname, String name, @Override public AggrStats getAggrColStatsFor(String dbName, String tblName, - List colNames, List partNames) throws NoSuchObjectException, MetaException, TException { - if (colNames.isEmpty() || partNames.isEmpty()) { + List colNames, List partNames, int numPartitions) throws NoSuchObjectException, MetaException, TException { + if (colNames.isEmpty() || (partNames != null && partNames.isEmpty())) { LOG.debug("Columns is empty or partNames is empty : Short-circuiting stats eval on client side."); return new AggrStats(new ArrayList(),0); // Nothing to aggregate } PartitionsStatsRequest req = new PartitionsStatsRequest(dbName, tblName, colNames, partNames); + if (numPartitions != -1) { + req.setNumPartitions(numPartitions); + } return client.get_aggr_stats_for(req); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index b6fe502..541fabf 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -1564,7 +1564,7 @@ GetRoleGrantsForPrincipalResponse get_role_grants_for_principal( GetRoleGrantsForPrincipalRequest getRolePrincReq) throws MetaException, TException; public AggrStats getAggrColStatsFor(String dbName, String tblName, - List colNames, List partName) throws NoSuchObjectException, MetaException, TException; + List colNames, List partName, int numPartitions) throws NoSuchObjectException, MetaException, TException; boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 9c900af..2e0d180 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -1195,9 +1195,10 @@ public ColumnStatistics getTableStats(final String dbName, final String tableNam } public AggrStats aggrColStatsForPartitions(String dbName, String tableName, - List partNames, List colNames, boolean useDensityFunctionForNDVEstimation) + List partNames, List colNames, boolean useDensityFunctionForNDVEstimation, int numPartitions) throws MetaException { - if (colNames.isEmpty() || partNames.isEmpty()) { + // partNames = null means all partitions should be considered while aggregating column stats. + if (colNames.isEmpty() || (partNames != null && partNames.isEmpty())) { LOG.debug("Columns is empty or partNames is empty : Short-circuiting stats eval"); return new AggrStats(new ArrayList(), 0); // Nothing to aggregate } @@ -1209,17 +1210,17 @@ public AggrStats aggrColStatsForPartitions(String dbName, String tableName, List colStatsAggrFromDB; int maxPartsPerCacheNode = aggrStatsCache.getMaxPartsPerCacheNode(); float fpp = aggrStatsCache.getFalsePositiveProbability(); - int partitionsRequested = partNames.size(); + int partitionsRequested = partNames == null ? numPartitions : partNames.size(); if (partitionsRequested > maxPartsPerCacheNode) { colStatsList = columnStatisticsObjForPartitions(dbName, tableName, partNames, colNames, - partsFound, useDensityFunctionForNDVEstimation); + partsFound, useDensityFunctionForNDVEstimation, numPartitions); } else { colStatsList = new ArrayList(); // Bloom filter for the new node that we will eventually add to the cache BloomFilter bloomFilter = createPartsBloomFilter(maxPartsPerCacheNode, fpp, partNames); for (String colName : colNames) { // Check the cache first - colStatsAggrCached = aggrStatsCache.get(dbName, tableName, colName, partNames); + colStatsAggrCached = partNames == null ? null : aggrStatsCache.get(dbName, tableName, colName, partNames); if (colStatsAggrCached != null) { colStatsList.add(colStatsAggrCached.getColStats()); } else { @@ -1228,7 +1229,7 @@ public AggrStats aggrColStatsForPartitions(String dbName, String tableName, // Read aggregated stats for one column colStatsAggrFromDB = columnStatisticsObjForPartitions(dbName, tableName, partNames, colNamesForDB, - partsFound, useDensityFunctionForNDVEstimation); + partsFound, useDensityFunctionForNDVEstimation, numPartitions); if (!colStatsAggrFromDB.isEmpty()) { ColumnStatisticsObj colStatsAggr = colStatsAggrFromDB.get(0); colStatsList.add(colStatsAggr); @@ -1241,7 +1242,7 @@ public AggrStats aggrColStatsForPartitions(String dbName, String tableName, } else { colStatsList = columnStatisticsObjForPartitions(dbName, tableName, partNames, colNames, partsFound, - useDensityFunctionForNDVEstimation); + useDensityFunctionForNDVEstimation, numPartitions); } LOG.info("useDensityFunctionForNDVEstimation = " + useDensityFunctionForNDVEstimation + "\npartsFound = " + partsFound + "\nColumnStatisticsObj = " @@ -1252,27 +1253,31 @@ public AggrStats aggrColStatsForPartitions(String dbName, String tableName, private BloomFilter createPartsBloomFilter(int maxPartsPerCacheNode, float fpp, List partNames) { BloomFilter bloomFilter = new BloomFilter(maxPartsPerCacheNode, fpp); - for (String partName : partNames) { - bloomFilter.add(partName.getBytes()); + if (partNames != null) { + for (String partName : partNames) { + bloomFilter.add(partName.getBytes()); + } } return bloomFilter; } private long partsFoundForPartitions(final String dbName, final String tableName, final List partNames, List colNames) throws MetaException { - assert !colNames.isEmpty() && !partNames.isEmpty(); + assert !colNames.isEmpty() && (partNames == null || !partNames.isEmpty()); final boolean doTrace = LOG.isDebugEnabled(); final String queryText0 = "select count(\"COLUMN_NAME\") from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " - + " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)" + + " and \"COLUMN_NAME\" in (%1$s) " + + (partNames == null ? "" : " and \"PARTITION_NAME\" in (%2$s)") + " group by \"PARTITION_NAME\""; List allCounts = runBatched(colNames, new Batchable() { public List run(final List inputColName) throws MetaException { return runBatched(partNames, new Batchable() { public List run(List inputPartNames) throws MetaException { long partsFound = 0; - String queryText = String.format(queryText0, - makeParams(inputColName.size()), makeParams(inputPartNames.size())); + String queryText = inputPartNames != null ? (String.format(queryText0, + makeParams(inputColName.size()), makeParams(inputPartNames.size()))) : + (String.format(queryText0, makeParams(inputColName.size()))); long start = doTrace ? System.nanoTime() : 0; Query query = pm.newQuery("javax.jdo.query.SQL", queryText); try { @@ -1304,8 +1309,8 @@ private long partsFoundForPartitions(final String dbName, final String tableName private List columnStatisticsObjForPartitions(final String dbName, final String tableName, final List partNames, List colNames, long partsFound, - final boolean useDensityFunctionForNDVEstimation) throws MetaException { - final boolean areAllPartsFound = (partsFound == partNames.size()); + final boolean useDensityFunctionForNDVEstimation, int numPartitions) throws MetaException { + final boolean areAllPartsFound = partsFound == (partNames == null ? numPartitions : partNames.size()); return runBatched(colNames, new Batchable() { public List run(final List inputColNames) throws MetaException { return runBatched(partNames, new Batchable() { @@ -1358,7 +1363,7 @@ private long partsFoundForPartitions(final String dbName, final String tableName // Extrapolation is not needed. if (areAllPartsFound) { queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + (partNames == null ? "" : " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")") + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); @@ -1386,7 +1391,7 @@ private long partsFoundForPartitions(final String dbName, final String tableName queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + (partNames == null ? "" : (" and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")")) + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); @@ -1420,8 +1425,9 @@ private long partsFoundForPartitions(final String dbName, final String tableName // Extrapolation is not needed for columns noExtraColumnNames if (noExtraColumnNames.size() != 0) { queryText = commonPrefix + " and \"COLUMN_NAME\" in (" - + makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in (" - + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; + + makeParams(noExtraColumnNames.size()) + ")" + + (partNames == null ? "" : (" and \"PARTITION_NAME\" in (" + + makeParams(partNames.size()) + ")")) + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); qResult = executeWithArray(query, @@ -1450,9 +1456,9 @@ private long partsFoundForPartitions(final String dbName, final String tableName Map> sumMap = new HashMap>(); queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\" where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " - + " and \"COLUMN_NAME\" in (" + makeParams(extraColumnNameTypeParts.size()) - + ") and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) - + ") group by \"COLUMN_NAME\""; + + " and \"COLUMN_NAME\" in (" + makeParams(extraColumnNameTypeParts.size()) + ") " + + (partNames == null ? "" : (" and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ") ")) + + " group by \"COLUMN_NAME\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); List extraColumnNames = new ArrayList(); @@ -1523,13 +1529,13 @@ private long partsFoundForPartitions(final String dbName, final String tableName queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + (partNames == null ? "" : (" and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")")) + " order by \"" + colStatName + "\""; } else { queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" - + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + + (partNames == null ? "" : (" and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")")) + " order by cast(\"" + colStatName + "\" as decimal)"; } start = doTrace ? System.nanoTime() : 0; @@ -1559,8 +1565,9 @@ private long partsFoundForPartitions(final String dbName, final String tableName + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" - + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" - + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\""; + + " and \"COLUMN_NAME\" = ?" + + (partNames == null ? "" : (" and \"PARTITION_NAME\" in (" + + makeParams(partNames.size()) + ")")) + " group by \"COLUMN_NAME\""; start = doTrace ? System.nanoTime() : 0; query = pm.newQuery("javax.jdo.query.SQL", queryText); qResult = executeWithArray(query, @@ -1612,16 +1619,17 @@ private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i, private Object[] prepareParams(String dbName, String tableName, List partNames, List colNames) throws MetaException { - Object[] params = new Object[colNames.size() + partNames.size() + 2]; + Object[] params = new Object[colNames.size() + (partNames == null ? 0 : partNames.size()) + 2]; int paramI = 0; params[paramI++] = dbName; params[paramI++] = tableName; for (String colName : colNames) { params[paramI++] = colName; } - for (String partName : partNames) { - params[paramI++] = partName; - } + if (partNames != null) + for (String partName : partNames) { + params[paramI++] = partName; + } return params; } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 5adfa02..4d7ca32 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -7253,14 +7253,14 @@ protected ColumnStatistics getJdoResult( @Override public AggrStats get_aggr_stats_for(String dbName, String tblName, - final List partNames, final List colNames) throws MetaException, NoSuchObjectException { + final List partNames, final List colNames, final int numPartitions) throws MetaException, NoSuchObjectException { final boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(getConf(), HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION); return new GetHelper(dbName, tblName, true, false) { @Override protected AggrStats getSqlResult(GetHelper ctx) throws MetaException { return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, - colNames, useDensityFunctionForNDVEstimation); + colNames, useDensityFunctionForNDVEstimation, numPartitions); } @Override protected AggrStats getJdoResult(GetHelper ctx) diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java index bbd47b8..8b66ac6 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -571,7 +571,7 @@ public void dropFunction(String dbName, String funcName) public List getFunctions(String dbName, String pattern) throws MetaException; public AggrStats get_aggr_stats_for(String dbName, String tblName, - List partNames, List colNames) throws MetaException, NoSuchObjectException; + List partNames, List colNames, int numPartitions) throws MetaException, NoSuchObjectException; /** * Get the next notification event. diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java index c65c7a4..913c2a8 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java @@ -1926,11 +1926,7 @@ public boolean deleteTableColumnStatistics(String dbName, String tableName, Stri */ @Override public AggrStats get_aggr_stats_for(String dbName, String tblName, List partNames, - List colNames) throws MetaException, NoSuchObjectException { - List> partVals = new ArrayList>(partNames.size()); - for (String partName : partNames) { - partVals.add(partNameToVals(partName)); - } + List colNames, int numParitions) throws MetaException, NoSuchObjectException { boolean commit = false; boolean hasAnyStats = false; openTransaction(); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 1ea72a0..3f485e9 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -753,7 +753,7 @@ public Function getFunction(String dbName, String funcName) @Override public AggrStats get_aggr_stats_for(String dbName, - String tblName, List partNames, List colNames) + String tblName, List partNames, List colNames, int numPartitions) throws MetaException { return null; } diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index 3e6acc7..d8bfa47 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -770,7 +770,7 @@ public Function getFunction(String dbName, String funcName) @Override public AggrStats get_aggr_stats_for(String dbName, - String tblName, List partNames, List colNames) + String tblName, List partNames, List colNames, int numPartitions) throws MetaException { return null; } diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCache.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCache.java index 6cd3a46..4eff633 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCache.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCache.java @@ -137,7 +137,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { }; AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); // Check that we had to build it from the stats @@ -148,7 +148,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // Call again, this time it should come from memory. Also, reverse the name order this time // to assure that we still hit. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -192,7 +192,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { }; AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); } @@ -258,7 +258,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { }; AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); // Check that we had to build it from the stats @@ -269,7 +269,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // Call again, this time it should come from memory. Also, reverse the name order this time // to assure that we still hit. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); @@ -304,7 +304,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { }; AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); // Check that we had to build it from the stats diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java index e0c4094..b3b9beb 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsCacheWithBitVector.java @@ -162,7 +162,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { }; AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1")); + Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); // Check that we had to build it from the stats @@ -174,7 +174,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { // order this time // to assure that we still hit. aggrStats = store.get_aggr_stats_for(dbName, tableName, - Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1")); + Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt()); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java index f4e55ed..ef5ba8b 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java @@ -144,7 +144,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col1")); + Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); } @@ -214,7 +214,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col1_decimal")); + Arrays.asList("col1_decimal"), 10); statChecker.checkStats(aggrStats); } @@ -284,7 +284,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col1_double")); + Arrays.asList("col1_double"), -1); statChecker.checkStats(aggrStats); } @@ -354,7 +354,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col1_string")); + Arrays.asList("col1_string"), -1); statChecker.checkStats(aggrStats); } @@ -423,7 +423,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col2")); + Arrays.asList("col2"), -1); statChecker.checkStats(aggrStats); } @@ -495,7 +495,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col3")); + Arrays.asList("col3"), -1); statChecker.checkStats(aggrStats); } @@ -567,7 +567,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col4")); + Arrays.asList("col4"), -1); statChecker.checkStats(aggrStats); } @@ -639,7 +639,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col5")); + Arrays.asList("col5"), -1); statChecker.checkStats(aggrStats); } @@ -711,7 +711,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col5_double")); + Arrays.asList("col5_double"), -1); statChecker.checkStats(aggrStats); } } diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java index 62918be..930e954 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java @@ -146,7 +146,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col1")); + Arrays.asList("col1"), -1); statChecker.checkStats(aggrStats); } @@ -215,7 +215,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col2")); + Arrays.asList("col2"), -1); statChecker.checkStats(aggrStats); } @@ -287,7 +287,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col3")); + Arrays.asList("col3"), -1); statChecker.checkStats(aggrStats); } @@ -359,7 +359,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col4")); + Arrays.asList("col4"), -1); statChecker.checkStats(aggrStats); } @@ -431,7 +431,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col5_long")); + Arrays.asList("col5_long"), -1); statChecker.checkStats(aggrStats); } @@ -503,7 +503,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col5_decimal")); + Arrays.asList("col5_decimal"), -1); statChecker.checkStats(aggrStats); } @@ -575,7 +575,7 @@ public void checkStats(AggrStats aggrStats) throws Exception { partNames.add("ds=" + i); } AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, - Arrays.asList("col5_double")); + Arrays.asList("col5_double"), -1); statChecker.checkStats(aggrStats); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 10f9cc7..6e4650f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -3484,9 +3484,9 @@ public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) t } public AggrStats getAggrColStatsFor(String dbName, String tblName, - List colNames, List partName) { + List colNames, List partName, int numPartitions) { try { - return getMSC().getAggrColStatsFor(dbName, tblName, colNames, partName); + return getMSC().getAggrColStatsFor(dbName, tblName, colNames, partName, numPartitions); } catch (Exception e) { LOG.debug(StringUtils.stringifyException(e)); return new AggrStats(new ArrayList(),0); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 26e936e..69a2ac0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -237,7 +237,7 @@ private static PrunedPartitionList getAllPartsFromCacheOrServer(Table tab, Strin } catch (HiveException e) { throw new SemanticException(e); } - ppList = new PrunedPartitionList(tab, parts, null, unknownPartitions); + ppList = new PrunedPartitionList(tab, parts, null, unknownPartitions, true); if (partsCache != null) { partsCache.put(key, ppList); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java index da2e1e2..c072cbf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java @@ -42,6 +42,9 @@ /** Whether there are partitions in the list that may or may not satisfy the criteria. */ private boolean hasUnknowns; + /** To determine whether there are all partitions, may have false negatives */ + private boolean hasAllPartitions; + public PrunedPartitionList(Table source, Set partitions, List referred, boolean hasUnknowns) { this.source = source; @@ -50,6 +53,19 @@ public PrunedPartitionList(Table source, Set partitions, List this.hasUnknowns = hasUnknowns; } + public PrunedPartitionList(Table source, Set partitions, List referred, + boolean hasUnknowns, boolean hasAllPartitions) { + this.source = source; + this.referred = referred; + this.partitions = partitions; + this.hasUnknowns = hasUnknowns; + this.hasAllPartitions = hasAllPartitions; + } + + public boolean getHasAllPartitions(){ + return hasAllPartitions; + } + public Table getSourceTable() { return source; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index d8acf94..6df07fd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -260,18 +260,22 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa stats.setBasicStatsState(State.PARTIAL); } if (fetchColStats) { - List partNames = new ArrayList(partList.getNotDeniedPartns().size()); - for (Partition part : partList.getNotDeniedPartns()) { - partNames.add(part.getName()); + List partNames = null; + List notDeniedPartitions = partList.getNotDeniedPartns(); + if (!partList.getHasAllPartitions()) { + partNames = new ArrayList(notDeniedPartitions.size()); + for (Partition part : notDeniedPartitions) { + partNames.add(part.getName()); + } } neededColumns = processNeededColumns(schema, neededColumns); AggrStats aggrStats = null; // We check the sizes of neededColumns and partNames here. If either // size is 0, aggrStats is null after several retries. Thus, we can // skip the step to connect to the metastore. - if (neededColumns.size() > 0 && partNames.size() > 0) { + if (neededColumns.size() > 0 && (partNames == null || partNames.size() > 0)) { aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), - neededColumns, partNames); + neededColumns, partNames, (partNames == null ? notDeniedPartitions.size() : -1)); } if (null == aggrStats || null == aggrStats.getColStats() || aggrStats.getColStatsSize() == 0) { @@ -304,7 +308,8 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa stats.addToColumnStats(columnStats); State colState = deriveStatType(columnStats, referencedColumns); - if (aggrStats.getPartsFound() != partNames.size() && colState != State.NONE) { + // If partNames = null, the HMS should retrieve all the partitions stats. + if (aggrStats.getPartsFound() != notDeniedPartitions.size() && colState != State.NONE) { LOG.debug("Column stats requested for : " + partNames.size() + " partitions. " + "Able to retrieve for " + aggrStats.getPartsFound() + " partitions"); colState = State.PARTIAL;