diff --git metastore/if/hive_metastore.thrift metastore/if/hive_metastore.thrift index eef1b80..cc802c6 100755 --- metastore/if/hive_metastore.thrift +++ metastore/if/hive_metastore.thrift @@ -264,15 +264,15 @@ struct BooleanColumnStatsData { } struct DoubleColumnStatsData { -1: required double lowValue, -2: required double highValue, +1: optional double lowValue, +2: optional double highValue, 3: required i64 numNulls, 4: required i64 numDVs } struct LongColumnStatsData { -1: required i64 lowValue, -2: required i64 highValue, +1: optional i64 lowValue, +2: optional i64 highValue, 3: required i64 numNulls, 4: required i64 numDVs } @@ -297,8 +297,8 @@ struct Decimal { } struct DecimalColumnStatsData { -1: required Decimal lowValue, -2: required Decimal highValue, +1: optional Decimal lowValue, +2: optional Decimal highValue, 3: required i64 numNulls, 4: required i64 numDVs } diff --git metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index 9e440bb..8014f2a 100644 --- metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -3215,8 +3215,8 @@ void swap(BooleanColumnStatsData &a, BooleanColumnStatsData &b) { swap(a.numNulls, b.numNulls); } -const char* DoubleColumnStatsData::ascii_fingerprint = "97F958CB0709C3B109A57EEE01946C13"; -const uint8_t DoubleColumnStatsData::binary_fingerprint[16] = {0x97,0xF9,0x58,0xCB,0x07,0x09,0xC3,0xB1,0x09,0xA5,0x7E,0xEE,0x01,0x94,0x6C,0x13}; +const char* DoubleColumnStatsData::ascii_fingerprint = "DA7C011321D74C48396AA002E61A0CBB"; +const uint8_t DoubleColumnStatsData::binary_fingerprint[16] = {0xDA,0x7C,0x01,0x13,0x21,0xD7,0x4C,0x48,0x39,0x6A,0xA0,0x02,0xE6,0x1A,0x0C,0xBB}; uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -3229,8 +3229,6 @@ uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro using ::apache::thrift::protocol::TProtocolException; - bool isset_lowValue = false; - bool isset_highValue = false; bool isset_numNulls = false; bool isset_numDVs = false; @@ -3245,7 +3243,7 @@ uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro case 1: if (ftype == ::apache::thrift::protocol::T_DOUBLE) { xfer += iprot->readDouble(this->lowValue); - isset_lowValue = true; + this->__isset.lowValue = true; } else { xfer += iprot->skip(ftype); } @@ -3253,7 +3251,7 @@ uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro case 2: if (ftype == ::apache::thrift::protocol::T_DOUBLE) { xfer += iprot->readDouble(this->highValue); - isset_highValue = true; + this->__isset.highValue = true; } else { xfer += iprot->skip(ftype); } @@ -3283,10 +3281,6 @@ uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro xfer += iprot->readStructEnd(); - if (!isset_lowValue) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_highValue) - throw TProtocolException(TProtocolException::INVALID_DATA); if (!isset_numNulls) throw TProtocolException(TProtocolException::INVALID_DATA); if (!isset_numDVs) @@ -3298,14 +3292,16 @@ uint32_t DoubleColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr uint32_t xfer = 0; xfer += oprot->writeStructBegin("DoubleColumnStatsData"); - xfer += oprot->writeFieldBegin("lowValue", ::apache::thrift::protocol::T_DOUBLE, 1); - xfer += oprot->writeDouble(this->lowValue); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("highValue", ::apache::thrift::protocol::T_DOUBLE, 2); - xfer += oprot->writeDouble(this->highValue); - xfer += oprot->writeFieldEnd(); - + if (this->__isset.lowValue) { + xfer += oprot->writeFieldBegin("lowValue", ::apache::thrift::protocol::T_DOUBLE, 1); + xfer += oprot->writeDouble(this->lowValue); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.highValue) { + xfer += oprot->writeFieldBegin("highValue", ::apache::thrift::protocol::T_DOUBLE, 2); + xfer += oprot->writeDouble(this->highValue); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldBegin("numNulls", ::apache::thrift::protocol::T_I64, 3); xfer += oprot->writeI64(this->numNulls); xfer += oprot->writeFieldEnd(); @@ -3325,10 +3321,11 @@ void swap(DoubleColumnStatsData &a, DoubleColumnStatsData &b) { swap(a.highValue, b.highValue); swap(a.numNulls, b.numNulls); swap(a.numDVs, b.numDVs); + swap(a.__isset, b.__isset); } -const char* LongColumnStatsData::ascii_fingerprint = "66C8E6F97F0275919D86FEB536251B17"; -const uint8_t LongColumnStatsData::binary_fingerprint[16] = {0x66,0xC8,0xE6,0xF9,0x7F,0x02,0x75,0x91,0x9D,0x86,0xFE,0xB5,0x36,0x25,0x1B,0x17}; +const char* LongColumnStatsData::ascii_fingerprint = "E685FC220B24E3B8B93604790DCB9AEA"; +const uint8_t LongColumnStatsData::binary_fingerprint[16] = {0xE6,0x85,0xFC,0x22,0x0B,0x24,0xE3,0xB8,0xB9,0x36,0x04,0x79,0x0D,0xCB,0x9A,0xEA}; uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -3341,8 +3338,6 @@ uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) using ::apache::thrift::protocol::TProtocolException; - bool isset_lowValue = false; - bool isset_highValue = false; bool isset_numNulls = false; bool isset_numDVs = false; @@ -3357,7 +3352,7 @@ uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) case 1: if (ftype == ::apache::thrift::protocol::T_I64) { xfer += iprot->readI64(this->lowValue); - isset_lowValue = true; + this->__isset.lowValue = true; } else { xfer += iprot->skip(ftype); } @@ -3365,7 +3360,7 @@ uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) case 2: if (ftype == ::apache::thrift::protocol::T_I64) { xfer += iprot->readI64(this->highValue); - isset_highValue = true; + this->__isset.highValue = true; } else { xfer += iprot->skip(ftype); } @@ -3395,10 +3390,6 @@ uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) xfer += iprot->readStructEnd(); - if (!isset_lowValue) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_highValue) - throw TProtocolException(TProtocolException::INVALID_DATA); if (!isset_numNulls) throw TProtocolException(TProtocolException::INVALID_DATA); if (!isset_numDVs) @@ -3410,14 +3401,16 @@ uint32_t LongColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot uint32_t xfer = 0; xfer += oprot->writeStructBegin("LongColumnStatsData"); - xfer += oprot->writeFieldBegin("lowValue", ::apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->lowValue); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("highValue", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->highValue); - xfer += oprot->writeFieldEnd(); - + if (this->__isset.lowValue) { + xfer += oprot->writeFieldBegin("lowValue", ::apache::thrift::protocol::T_I64, 1); + xfer += oprot->writeI64(this->lowValue); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.highValue) { + xfer += oprot->writeFieldBegin("highValue", ::apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->highValue); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldBegin("numNulls", ::apache::thrift::protocol::T_I64, 3); xfer += oprot->writeI64(this->numNulls); xfer += oprot->writeFieldEnd(); @@ -3437,6 +3430,7 @@ void swap(LongColumnStatsData &a, LongColumnStatsData &b) { swap(a.highValue, b.highValue); swap(a.numNulls, b.numNulls); swap(a.numDVs, b.numDVs); + swap(a.__isset, b.__isset); } const char* StringColumnStatsData::ascii_fingerprint = "D017B08C3DF12C3AB98788B2E67DAAB3"; @@ -3727,8 +3721,8 @@ void swap(Decimal &a, Decimal &b) { swap(a.scale, b.scale); } -const char* DecimalColumnStatsData::ascii_fingerprint = "3AE5C36598A014EE815B87600C3087B5"; -const uint8_t DecimalColumnStatsData::binary_fingerprint[16] = {0x3A,0xE5,0xC3,0x65,0x98,0xA0,0x14,0xEE,0x81,0x5B,0x87,0x60,0x0C,0x30,0x87,0xB5}; +const char* DecimalColumnStatsData::ascii_fingerprint = "B6D47E7A28922BFA93FE05E9F1B04748"; +const uint8_t DecimalColumnStatsData::binary_fingerprint[16] = {0xB6,0xD4,0x7E,0x7A,0x28,0x92,0x2B,0xFA,0x93,0xFE,0x05,0xE9,0xF1,0xB0,0x47,0x48}; uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -3741,8 +3735,6 @@ uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr using ::apache::thrift::protocol::TProtocolException; - bool isset_lowValue = false; - bool isset_highValue = false; bool isset_numNulls = false; bool isset_numDVs = false; @@ -3757,7 +3749,7 @@ uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr case 1: if (ftype == ::apache::thrift::protocol::T_STRUCT) { xfer += this->lowValue.read(iprot); - isset_lowValue = true; + this->__isset.lowValue = true; } else { xfer += iprot->skip(ftype); } @@ -3765,7 +3757,7 @@ uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr case 2: if (ftype == ::apache::thrift::protocol::T_STRUCT) { xfer += this->highValue.read(iprot); - isset_highValue = true; + this->__isset.highValue = true; } else { xfer += iprot->skip(ftype); } @@ -3795,10 +3787,6 @@ uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr xfer += iprot->readStructEnd(); - if (!isset_lowValue) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_highValue) - throw TProtocolException(TProtocolException::INVALID_DATA); if (!isset_numNulls) throw TProtocolException(TProtocolException::INVALID_DATA); if (!isset_numDVs) @@ -3810,14 +3798,16 @@ uint32_t DecimalColumnStatsData::write(::apache::thrift::protocol::TProtocol* op uint32_t xfer = 0; xfer += oprot->writeStructBegin("DecimalColumnStatsData"); - xfer += oprot->writeFieldBegin("lowValue", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->lowValue.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("highValue", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->highValue.write(oprot); - xfer += oprot->writeFieldEnd(); - + if (this->__isset.lowValue) { + xfer += oprot->writeFieldBegin("lowValue", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->lowValue.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.highValue) { + xfer += oprot->writeFieldBegin("highValue", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->highValue.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldBegin("numNulls", ::apache::thrift::protocol::T_I64, 3); xfer += oprot->writeI64(this->numNulls); xfer += oprot->writeFieldEnd(); @@ -3837,10 +3827,11 @@ void swap(DecimalColumnStatsData &a, DecimalColumnStatsData &b) { swap(a.highValue, b.highValue); swap(a.numNulls, b.numNulls); swap(a.numDVs, b.numDVs); + swap(a.__isset, b.__isset); } -const char* ColumnStatisticsData::ascii_fingerprint = "343F5865568AF7DA61829A616EB8C57C"; -const uint8_t ColumnStatisticsData::binary_fingerprint[16] = {0x34,0x3F,0x58,0x65,0x56,0x8A,0xF7,0xDA,0x61,0x82,0x9A,0x61,0x6E,0xB8,0xC5,0x7C}; +const char* ColumnStatisticsData::ascii_fingerprint = "D079ACEA6EE0998D0A45CB65FF1EAADD"; +const uint8_t ColumnStatisticsData::binary_fingerprint[16] = {0xD0,0x79,0xAC,0xEA,0x6E,0xE0,0x99,0x8D,0x0A,0x45,0xCB,0x65,0xFF,0x1E,0xAA,0xDD}; uint32_t ColumnStatisticsData::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -3966,8 +3957,8 @@ void swap(ColumnStatisticsData &a, ColumnStatisticsData &b) { swap(a.__isset, b.__isset); } -const char* ColumnStatisticsObj::ascii_fingerprint = "CFDBB9DFF4F1670367EA5356861EC180"; -const uint8_t ColumnStatisticsObj::binary_fingerprint[16] = {0xCF,0xDB,0xB9,0xDF,0xF4,0xF1,0x67,0x03,0x67,0xEA,0x53,0x56,0x86,0x1E,0xC1,0x80}; +const char* ColumnStatisticsObj::ascii_fingerprint = "E49E62CFC71682004614EFEDAC3CD3F4"; +const uint8_t ColumnStatisticsObj::binary_fingerprint[16] = {0xE4,0x9E,0x62,0xCF,0xC7,0x16,0x82,0x00,0x46,0x14,0xEF,0xED,0xAC,0x3C,0xD3,0xF4}; uint32_t ColumnStatisticsObj::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -4187,8 +4178,8 @@ void swap(ColumnStatisticsDesc &a, ColumnStatisticsDesc &b) { swap(a.__isset, b.__isset); } -const char* ColumnStatistics::ascii_fingerprint = "37AA2F226C29DF25254CCCE6A7DDBAF3"; -const uint8_t ColumnStatistics::binary_fingerprint[16] = {0x37,0xAA,0x2F,0x22,0x6C,0x29,0xDF,0x25,0x25,0x4C,0xCC,0xE6,0xA7,0xDD,0xBA,0xF3}; +const char* ColumnStatistics::ascii_fingerprint = "6682E234199B2CD3807B1ED420C6A7F8"; +const uint8_t ColumnStatistics::binary_fingerprint[16] = {0x66,0x82,0xE2,0x34,0x19,0x9B,0x2C,0xD3,0x80,0x7B,0x1E,0xD4,0x20,0xC6,0xA7,0xF8}; uint32_t ColumnStatistics::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -4717,8 +4708,8 @@ void swap(PartitionsByExprRequest &a, PartitionsByExprRequest &b) { swap(a.__isset, b.__isset); } -const char* TableStatsResult::ascii_fingerprint = "178DBEC75B48CDDEDB3B8338EF6FBF2F"; -const uint8_t TableStatsResult::binary_fingerprint[16] = {0x17,0x8D,0xBE,0xC7,0x5B,0x48,0xCD,0xDE,0xDB,0x3B,0x83,0x38,0xEF,0x6F,0xBF,0x2F}; +const char* TableStatsResult::ascii_fingerprint = "CE3E8F0D9B310B8D33CB7A89A75F3E05"; +const uint8_t TableStatsResult::binary_fingerprint[16] = {0xCE,0x3E,0x8F,0x0D,0x9B,0x31,0x0B,0x8D,0x33,0xCB,0x7A,0x89,0xA7,0x5F,0x3E,0x05}; uint32_t TableStatsResult::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -4801,8 +4792,8 @@ void swap(TableStatsResult &a, TableStatsResult &b) { swap(a.tableStats, b.tableStats); } -const char* PartitionsStatsResult::ascii_fingerprint = "0E3D549A0384CD453E2CB90C734A6245"; -const uint8_t PartitionsStatsResult::binary_fingerprint[16] = {0x0E,0x3D,0x54,0x9A,0x03,0x84,0xCD,0x45,0x3E,0x2C,0xB9,0x0C,0x73,0x4A,0x62,0x45}; +const char* PartitionsStatsResult::ascii_fingerprint = "FF175B50C5EF6F442D3AF25B06435A39"; +const uint8_t PartitionsStatsResult::binary_fingerprint[16] = {0xFF,0x17,0x5B,0x50,0xC5,0xEF,0x6F,0x44,0x2D,0x3A,0xF2,0x5B,0x06,0x43,0x5A,0x39}; uint32_t PartitionsStatsResult::read(::apache::thrift::protocol::TProtocol* iprot) { diff --git metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index 43869c2..413256f 100644 --- metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -1741,12 +1741,17 @@ class BooleanColumnStatsData { void swap(BooleanColumnStatsData &a, BooleanColumnStatsData &b); +typedef struct _DoubleColumnStatsData__isset { + _DoubleColumnStatsData__isset() : lowValue(false), highValue(false) {} + bool lowValue; + bool highValue; +} _DoubleColumnStatsData__isset; class DoubleColumnStatsData { public: - static const char* ascii_fingerprint; // = "97F958CB0709C3B109A57EEE01946C13"; - static const uint8_t binary_fingerprint[16]; // = {0x97,0xF9,0x58,0xCB,0x07,0x09,0xC3,0xB1,0x09,0xA5,0x7E,0xEE,0x01,0x94,0x6C,0x13}; + static const char* ascii_fingerprint; // = "DA7C011321D74C48396AA002E61A0CBB"; + static const uint8_t binary_fingerprint[16]; // = {0xDA,0x7C,0x01,0x13,0x21,0xD7,0x4C,0x48,0x39,0x6A,0xA0,0x02,0xE6,0x1A,0x0C,0xBB}; DoubleColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0) { } @@ -1758,12 +1763,16 @@ class DoubleColumnStatsData { int64_t numNulls; int64_t numDVs; + _DoubleColumnStatsData__isset __isset; + void __set_lowValue(const double val) { lowValue = val; + __isset.lowValue = true; } void __set_highValue(const double val) { highValue = val; + __isset.highValue = true; } void __set_numNulls(const int64_t val) { @@ -1776,9 +1785,13 @@ class DoubleColumnStatsData { bool operator == (const DoubleColumnStatsData & rhs) const { - if (!(lowValue == rhs.lowValue)) + if (__isset.lowValue != rhs.__isset.lowValue) + return false; + else if (__isset.lowValue && !(lowValue == rhs.lowValue)) + return false; + if (__isset.highValue != rhs.__isset.highValue) return false; - if (!(highValue == rhs.highValue)) + else if (__isset.highValue && !(highValue == rhs.highValue)) return false; if (!(numNulls == rhs.numNulls)) return false; @@ -1799,12 +1812,17 @@ class DoubleColumnStatsData { void swap(DoubleColumnStatsData &a, DoubleColumnStatsData &b); +typedef struct _LongColumnStatsData__isset { + _LongColumnStatsData__isset() : lowValue(false), highValue(false) {} + bool lowValue; + bool highValue; +} _LongColumnStatsData__isset; class LongColumnStatsData { public: - static const char* ascii_fingerprint; // = "66C8E6F97F0275919D86FEB536251B17"; - static const uint8_t binary_fingerprint[16]; // = {0x66,0xC8,0xE6,0xF9,0x7F,0x02,0x75,0x91,0x9D,0x86,0xFE,0xB5,0x36,0x25,0x1B,0x17}; + static const char* ascii_fingerprint; // = "E685FC220B24E3B8B93604790DCB9AEA"; + static const uint8_t binary_fingerprint[16]; // = {0xE6,0x85,0xFC,0x22,0x0B,0x24,0xE3,0xB8,0xB9,0x36,0x04,0x79,0x0D,0xCB,0x9A,0xEA}; LongColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0) { } @@ -1816,12 +1834,16 @@ class LongColumnStatsData { int64_t numNulls; int64_t numDVs; + _LongColumnStatsData__isset __isset; + void __set_lowValue(const int64_t val) { lowValue = val; + __isset.lowValue = true; } void __set_highValue(const int64_t val) { highValue = val; + __isset.highValue = true; } void __set_numNulls(const int64_t val) { @@ -1834,9 +1856,13 @@ class LongColumnStatsData { bool operator == (const LongColumnStatsData & rhs) const { - if (!(lowValue == rhs.lowValue)) + if (__isset.lowValue != rhs.__isset.lowValue) + return false; + else if (__isset.lowValue && !(lowValue == rhs.lowValue)) return false; - if (!(highValue == rhs.highValue)) + if (__isset.highValue != rhs.__isset.highValue) + return false; + else if (__isset.highValue && !(highValue == rhs.highValue)) return false; if (!(numNulls == rhs.numNulls)) return false; @@ -2010,12 +2036,17 @@ class Decimal { void swap(Decimal &a, Decimal &b); +typedef struct _DecimalColumnStatsData__isset { + _DecimalColumnStatsData__isset() : lowValue(false), highValue(false) {} + bool lowValue; + bool highValue; +} _DecimalColumnStatsData__isset; class DecimalColumnStatsData { public: - static const char* ascii_fingerprint; // = "3AE5C36598A014EE815B87600C3087B5"; - static const uint8_t binary_fingerprint[16]; // = {0x3A,0xE5,0xC3,0x65,0x98,0xA0,0x14,0xEE,0x81,0x5B,0x87,0x60,0x0C,0x30,0x87,0xB5}; + static const char* ascii_fingerprint; // = "B6D47E7A28922BFA93FE05E9F1B04748"; + static const uint8_t binary_fingerprint[16]; // = {0xB6,0xD4,0x7E,0x7A,0x28,0x92,0x2B,0xFA,0x93,0xFE,0x05,0xE9,0xF1,0xB0,0x47,0x48}; DecimalColumnStatsData() : numNulls(0), numDVs(0) { } @@ -2027,12 +2058,16 @@ class DecimalColumnStatsData { int64_t numNulls; int64_t numDVs; + _DecimalColumnStatsData__isset __isset; + void __set_lowValue(const Decimal& val) { lowValue = val; + __isset.lowValue = true; } void __set_highValue(const Decimal& val) { highValue = val; + __isset.highValue = true; } void __set_numNulls(const int64_t val) { @@ -2045,9 +2080,13 @@ class DecimalColumnStatsData { bool operator == (const DecimalColumnStatsData & rhs) const { - if (!(lowValue == rhs.lowValue)) + if (__isset.lowValue != rhs.__isset.lowValue) + return false; + else if (__isset.lowValue && !(lowValue == rhs.lowValue)) + return false; + if (__isset.highValue != rhs.__isset.highValue) return false; - if (!(highValue == rhs.highValue)) + else if (__isset.highValue && !(highValue == rhs.highValue)) return false; if (!(numNulls == rhs.numNulls)) return false; @@ -2081,8 +2120,8 @@ typedef struct _ColumnStatisticsData__isset { class ColumnStatisticsData { public: - static const char* ascii_fingerprint; // = "343F5865568AF7DA61829A616EB8C57C"; - static const uint8_t binary_fingerprint[16]; // = {0x34,0x3F,0x58,0x65,0x56,0x8A,0xF7,0xDA,0x61,0x82,0x9A,0x61,0x6E,0xB8,0xC5,0x7C}; + static const char* ascii_fingerprint; // = "D079ACEA6EE0998D0A45CB65FF1EAADD"; + static const uint8_t binary_fingerprint[16]; // = {0xD0,0x79,0xAC,0xEA,0x6E,0xE0,0x99,0x8D,0x0A,0x45,0xCB,0x65,0xFF,0x1E,0xAA,0xDD}; ColumnStatisticsData() { } @@ -2155,8 +2194,8 @@ void swap(ColumnStatisticsData &a, ColumnStatisticsData &b); class ColumnStatisticsObj { public: - static const char* ascii_fingerprint; // = "CFDBB9DFF4F1670367EA5356861EC180"; - static const uint8_t binary_fingerprint[16]; // = {0xCF,0xDB,0xB9,0xDF,0xF4,0xF1,0x67,0x03,0x67,0xEA,0x53,0x56,0x86,0x1E,0xC1,0x80}; + static const char* ascii_fingerprint; // = "E49E62CFC71682004614EFEDAC3CD3F4"; + static const uint8_t binary_fingerprint[16]; // = {0xE4,0x9E,0x62,0xCF,0xC7,0x16,0x82,0x00,0x46,0x14,0xEF,0xED,0xAC,0x3C,0xD3,0xF4}; ColumnStatisticsObj() : colName(), colType() { } @@ -2284,8 +2323,8 @@ void swap(ColumnStatisticsDesc &a, ColumnStatisticsDesc &b); class ColumnStatistics { public: - static const char* ascii_fingerprint; // = "37AA2F226C29DF25254CCCE6A7DDBAF3"; - static const uint8_t binary_fingerprint[16]; // = {0x37,0xAA,0x2F,0x22,0x6C,0x29,0xDF,0x25,0x25,0x4C,0xCC,0xE6,0xA7,0xDD,0xBA,0xF3}; + static const char* ascii_fingerprint; // = "6682E234199B2CD3807B1ED420C6A7F8"; + static const uint8_t binary_fingerprint[16]; // = {0x66,0x82,0xE2,0x34,0x19,0x9B,0x2C,0xD3,0x80,0x7B,0x1E,0xD4,0x20,0xC6,0xA7,0xF8}; ColumnStatistics() { } @@ -2544,8 +2583,8 @@ void swap(PartitionsByExprRequest &a, PartitionsByExprRequest &b); class TableStatsResult { public: - static const char* ascii_fingerprint; // = "178DBEC75B48CDDEDB3B8338EF6FBF2F"; - static const uint8_t binary_fingerprint[16]; // = {0x17,0x8D,0xBE,0xC7,0x5B,0x48,0xCD,0xDE,0xDB,0x3B,0x83,0x38,0xEF,0x6F,0xBF,0x2F}; + static const char* ascii_fingerprint; // = "CE3E8F0D9B310B8D33CB7A89A75F3E05"; + static const uint8_t binary_fingerprint[16]; // = {0xCE,0x3E,0x8F,0x0D,0x9B,0x31,0x0B,0x8D,0x33,0xCB,0x7A,0x89,0xA7,0x5F,0x3E,0x05}; TableStatsResult() { } @@ -2581,8 +2620,8 @@ void swap(TableStatsResult &a, TableStatsResult &b); class PartitionsStatsResult { public: - static const char* ascii_fingerprint; // = "0E3D549A0384CD453E2CB90C734A6245"; - static const uint8_t binary_fingerprint[16]; // = {0x0E,0x3D,0x54,0x9A,0x03,0x84,0xCD,0x45,0x3E,0x2C,0xB9,0x0C,0x73,0x4A,0x62,0x45}; + static const char* ascii_fingerprint; // = "FF175B50C5EF6F442D3AF25B06435A39"; + static const uint8_t binary_fingerprint[16]; // = {0xFF,0x17,0x5B,0x50,0xC5,0xEF,0x6F,0x44,0x2D,0x3A,0xF2,0x5B,0x06,0x43,0x5A,0x39}; PartitionsStatsResult() { } diff --git metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java index 5661252..951d479 100644 --- metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java +++ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java @@ -45,8 +45,8 @@ schemes.put(TupleScheme.class, new DecimalColumnStatsDataTupleSchemeFactory()); } - private Decimal lowValue; // required - private Decimal highValue; // required + private Decimal lowValue; // optional + private Decimal highValue; // optional private long numNulls; // required private long numDVs; // required @@ -121,12 +121,13 @@ public String getFieldName() { private static final int __NUMNULLS_ISSET_ID = 0; private static final int __NUMDVS_ISSET_ID = 1; private byte __isset_bitfield = 0; + private _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.LOW_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lowValue", org.apache.thrift.TFieldRequirementType.REQUIRED, + tmpMap.put(_Fields.LOW_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lowValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Decimal.class))); - tmpMap.put(_Fields.HIGH_VALUE, new org.apache.thrift.meta_data.FieldMetaData("highValue", org.apache.thrift.TFieldRequirementType.REQUIRED, + tmpMap.put(_Fields.HIGH_VALUE, new org.apache.thrift.meta_data.FieldMetaData("highValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Decimal.class))); tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); @@ -140,14 +141,10 @@ public DecimalColumnStatsData() { } public DecimalColumnStatsData( - Decimal lowValue, - Decimal highValue, long numNulls, long numDVs) { this(); - this.lowValue = lowValue; - this.highValue = highValue; this.numNulls = numNulls; setNumNullsIsSet(true); this.numDVs = numDVs; @@ -494,21 +491,25 @@ public String toString() { StringBuilder sb = new StringBuilder("DecimalColumnStatsData("); boolean first = true; - sb.append("lowValue:"); - if (this.lowValue == null) { - sb.append("null"); - } else { - sb.append(this.lowValue); + if (isSetLowValue()) { + sb.append("lowValue:"); + if (this.lowValue == null) { + sb.append("null"); + } else { + sb.append(this.lowValue); + } + first = false; } - first = false; - if (!first) sb.append(", "); - sb.append("highValue:"); - if (this.highValue == null) { - sb.append("null"); - } else { - sb.append(this.highValue); + if (isSetHighValue()) { + if (!first) sb.append(", "); + sb.append("highValue:"); + if (this.highValue == null) { + sb.append("null"); + } else { + sb.append(this.highValue); + } + first = false; } - first = false; if (!first) sb.append(", "); sb.append("numNulls:"); sb.append(this.numNulls); @@ -523,14 +524,6 @@ public String toString() { public void validate() throws org.apache.thrift.TException { // check for required fields - if (!isSetLowValue()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'lowValue' is unset! Struct:" + toString()); - } - - if (!isSetHighValue()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'highValue' is unset! Struct:" + toString()); - } - if (!isSetNumNulls()) { throw new org.apache.thrift.protocol.TProtocolException("Required field 'numNulls' is unset! Struct:" + toString()); } @@ -632,14 +625,18 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, DecimalColumnStats oprot.writeStructBegin(STRUCT_DESC); if (struct.lowValue != null) { - oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); - struct.lowValue.write(oprot); - oprot.writeFieldEnd(); + if (struct.isSetLowValue()) { + oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); + struct.lowValue.write(oprot); + oprot.writeFieldEnd(); + } } if (struct.highValue != null) { - oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); - struct.highValue.write(oprot); - oprot.writeFieldEnd(); + if (struct.isSetHighValue()) { + oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); + struct.highValue.write(oprot); + oprot.writeFieldEnd(); + } } oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); oprot.writeI64(struct.numNulls); @@ -664,25 +661,42 @@ public DecimalColumnStatsDataTupleScheme getScheme() { @Override public void write(org.apache.thrift.protocol.TProtocol prot, DecimalColumnStatsData struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; - struct.lowValue.write(oprot); - struct.highValue.write(oprot); oprot.writeI64(struct.numNulls); oprot.writeI64(struct.numDVs); + BitSet optionals = new BitSet(); + if (struct.isSetLowValue()) { + optionals.set(0); + } + if (struct.isSetHighValue()) { + optionals.set(1); + } + oprot.writeBitSet(optionals, 2); + if (struct.isSetLowValue()) { + struct.lowValue.write(oprot); + } + if (struct.isSetHighValue()) { + struct.highValue.write(oprot); + } } @Override public void read(org.apache.thrift.protocol.TProtocol prot, DecimalColumnStatsData struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - struct.lowValue = new Decimal(); - struct.lowValue.read(iprot); - struct.setLowValueIsSet(true); - struct.highValue = new Decimal(); - struct.highValue.read(iprot); - struct.setHighValueIsSet(true); struct.numNulls = iprot.readI64(); struct.setNumNullsIsSet(true); struct.numDVs = iprot.readI64(); struct.setNumDVsIsSet(true); + BitSet incoming = iprot.readBitSet(2); + if (incoming.get(0)) { + struct.lowValue = new Decimal(); + struct.lowValue.read(iprot); + struct.setLowValueIsSet(true); + } + if (incoming.get(1)) { + struct.highValue = new Decimal(); + struct.highValue.read(iprot); + struct.setHighValueIsSet(true); + } } } diff --git metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java index d3f3f68..4203fd8 100644 --- metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java +++ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java @@ -45,8 +45,8 @@ schemes.put(TupleScheme.class, new DoubleColumnStatsDataTupleSchemeFactory()); } - private double lowValue; // required - private double highValue; // required + private double lowValue; // optional + private double highValue; // optional private long numNulls; // required private long numDVs; // required @@ -123,12 +123,13 @@ public String getFieldName() { private static final int __NUMNULLS_ISSET_ID = 2; private static final int __NUMDVS_ISSET_ID = 3; private byte __isset_bitfield = 0; + private _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.LOW_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lowValue", org.apache.thrift.TFieldRequirementType.REQUIRED, + tmpMap.put(_Fields.LOW_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lowValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))); - tmpMap.put(_Fields.HIGH_VALUE, new org.apache.thrift.meta_data.FieldMetaData("highValue", org.apache.thrift.TFieldRequirementType.REQUIRED, + tmpMap.put(_Fields.HIGH_VALUE, new org.apache.thrift.meta_data.FieldMetaData("highValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))); tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); @@ -142,16 +143,10 @@ public DoubleColumnStatsData() { } public DoubleColumnStatsData( - double lowValue, - double highValue, long numNulls, long numDVs) { this(); - this.lowValue = lowValue; - setLowValueIsSet(true); - this.highValue = highValue; - setHighValueIsSet(true); this.numNulls = numNulls; setNumNullsIsSet(true); this.numDVs = numDVs; @@ -360,8 +355,8 @@ public boolean equals(DoubleColumnStatsData that) { if (that == null) return false; - boolean this_present_lowValue = true; - boolean that_present_lowValue = true; + boolean this_present_lowValue = true && this.isSetLowValue(); + boolean that_present_lowValue = true && that.isSetLowValue(); if (this_present_lowValue || that_present_lowValue) { if (!(this_present_lowValue && that_present_lowValue)) return false; @@ -369,8 +364,8 @@ public boolean equals(DoubleColumnStatsData that) { return false; } - boolean this_present_highValue = true; - boolean that_present_highValue = true; + boolean this_present_highValue = true && this.isSetHighValue(); + boolean that_present_highValue = true && that.isSetHighValue(); if (this_present_highValue || that_present_highValue) { if (!(this_present_highValue && that_present_highValue)) return false; @@ -403,12 +398,12 @@ public boolean equals(DoubleColumnStatsData that) { public int hashCode() { HashCodeBuilder builder = new HashCodeBuilder(); - boolean present_lowValue = true; + boolean present_lowValue = true && (isSetLowValue()); builder.append(present_lowValue); if (present_lowValue) builder.append(lowValue); - boolean present_highValue = true; + boolean present_highValue = true && (isSetHighValue()); builder.append(present_highValue); if (present_highValue) builder.append(highValue); @@ -494,13 +489,17 @@ public String toString() { StringBuilder sb = new StringBuilder("DoubleColumnStatsData("); boolean first = true; - sb.append("lowValue:"); - sb.append(this.lowValue); - first = false; - if (!first) sb.append(", "); - sb.append("highValue:"); - sb.append(this.highValue); - first = false; + if (isSetLowValue()) { + sb.append("lowValue:"); + sb.append(this.lowValue); + first = false; + } + if (isSetHighValue()) { + if (!first) sb.append(", "); + sb.append("highValue:"); + sb.append(this.highValue); + first = false; + } if (!first) sb.append(", "); sb.append("numNulls:"); sb.append(this.numNulls); @@ -515,14 +514,6 @@ public String toString() { public void validate() throws org.apache.thrift.TException { // check for required fields - if (!isSetLowValue()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'lowValue' is unset! Struct:" + toString()); - } - - if (!isSetHighValue()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'highValue' is unset! Struct:" + toString()); - } - if (!isSetNumNulls()) { throw new org.apache.thrift.protocol.TProtocolException("Required field 'numNulls' is unset! Struct:" + toString()); } @@ -615,12 +606,16 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, DoubleColumnStatsD struct.validate(); oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); - oprot.writeDouble(struct.lowValue); - oprot.writeFieldEnd(); - oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); - oprot.writeDouble(struct.highValue); - oprot.writeFieldEnd(); + if (struct.isSetLowValue()) { + oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); + oprot.writeDouble(struct.lowValue); + oprot.writeFieldEnd(); + } + if (struct.isSetHighValue()) { + oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); + oprot.writeDouble(struct.highValue); + oprot.writeFieldEnd(); + } oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); oprot.writeI64(struct.numNulls); oprot.writeFieldEnd(); @@ -644,23 +639,40 @@ public DoubleColumnStatsDataTupleScheme getScheme() { @Override public void write(org.apache.thrift.protocol.TProtocol prot, DoubleColumnStatsData struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeDouble(struct.lowValue); - oprot.writeDouble(struct.highValue); oprot.writeI64(struct.numNulls); oprot.writeI64(struct.numDVs); + BitSet optionals = new BitSet(); + if (struct.isSetLowValue()) { + optionals.set(0); + } + if (struct.isSetHighValue()) { + optionals.set(1); + } + oprot.writeBitSet(optionals, 2); + if (struct.isSetLowValue()) { + oprot.writeDouble(struct.lowValue); + } + if (struct.isSetHighValue()) { + oprot.writeDouble(struct.highValue); + } } @Override public void read(org.apache.thrift.protocol.TProtocol prot, DoubleColumnStatsData struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - struct.lowValue = iprot.readDouble(); - struct.setLowValueIsSet(true); - struct.highValue = iprot.readDouble(); - struct.setHighValueIsSet(true); struct.numNulls = iprot.readI64(); struct.setNumNullsIsSet(true); struct.numDVs = iprot.readI64(); struct.setNumDVsIsSet(true); + BitSet incoming = iprot.readBitSet(2); + if (incoming.get(0)) { + struct.lowValue = iprot.readDouble(); + struct.setLowValueIsSet(true); + } + if (incoming.get(1)) { + struct.highValue = iprot.readDouble(); + struct.setHighValueIsSet(true); + } } } diff --git metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java index 2cf4380..d817d46 100644 --- metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java +++ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java @@ -45,8 +45,8 @@ schemes.put(TupleScheme.class, new LongColumnStatsDataTupleSchemeFactory()); } - private long lowValue; // required - private long highValue; // required + private long lowValue; // optional + private long highValue; // optional private long numNulls; // required private long numDVs; // required @@ -123,12 +123,13 @@ public String getFieldName() { private static final int __NUMNULLS_ISSET_ID = 2; private static final int __NUMDVS_ISSET_ID = 3; private byte __isset_bitfield = 0; + private _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.LOW_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lowValue", org.apache.thrift.TFieldRequirementType.REQUIRED, + tmpMap.put(_Fields.LOW_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lowValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.HIGH_VALUE, new org.apache.thrift.meta_data.FieldMetaData("highValue", org.apache.thrift.TFieldRequirementType.REQUIRED, + tmpMap.put(_Fields.HIGH_VALUE, new org.apache.thrift.meta_data.FieldMetaData("highValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); @@ -142,16 +143,10 @@ public LongColumnStatsData() { } public LongColumnStatsData( - long lowValue, - long highValue, long numNulls, long numDVs) { this(); - this.lowValue = lowValue; - setLowValueIsSet(true); - this.highValue = highValue; - setHighValueIsSet(true); this.numNulls = numNulls; setNumNullsIsSet(true); this.numDVs = numDVs; @@ -360,8 +355,8 @@ public boolean equals(LongColumnStatsData that) { if (that == null) return false; - boolean this_present_lowValue = true; - boolean that_present_lowValue = true; + boolean this_present_lowValue = true && this.isSetLowValue(); + boolean that_present_lowValue = true && that.isSetLowValue(); if (this_present_lowValue || that_present_lowValue) { if (!(this_present_lowValue && that_present_lowValue)) return false; @@ -369,8 +364,8 @@ public boolean equals(LongColumnStatsData that) { return false; } - boolean this_present_highValue = true; - boolean that_present_highValue = true; + boolean this_present_highValue = true && this.isSetHighValue(); + boolean that_present_highValue = true && that.isSetHighValue(); if (this_present_highValue || that_present_highValue) { if (!(this_present_highValue && that_present_highValue)) return false; @@ -403,12 +398,12 @@ public boolean equals(LongColumnStatsData that) { public int hashCode() { HashCodeBuilder builder = new HashCodeBuilder(); - boolean present_lowValue = true; + boolean present_lowValue = true && (isSetLowValue()); builder.append(present_lowValue); if (present_lowValue) builder.append(lowValue); - boolean present_highValue = true; + boolean present_highValue = true && (isSetHighValue()); builder.append(present_highValue); if (present_highValue) builder.append(highValue); @@ -494,13 +489,17 @@ public String toString() { StringBuilder sb = new StringBuilder("LongColumnStatsData("); boolean first = true; - sb.append("lowValue:"); - sb.append(this.lowValue); - first = false; - if (!first) sb.append(", "); - sb.append("highValue:"); - sb.append(this.highValue); - first = false; + if (isSetLowValue()) { + sb.append("lowValue:"); + sb.append(this.lowValue); + first = false; + } + if (isSetHighValue()) { + if (!first) sb.append(", "); + sb.append("highValue:"); + sb.append(this.highValue); + first = false; + } if (!first) sb.append(", "); sb.append("numNulls:"); sb.append(this.numNulls); @@ -515,14 +514,6 @@ public String toString() { public void validate() throws org.apache.thrift.TException { // check for required fields - if (!isSetLowValue()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'lowValue' is unset! Struct:" + toString()); - } - - if (!isSetHighValue()) { - throw new org.apache.thrift.protocol.TProtocolException("Required field 'highValue' is unset! Struct:" + toString()); - } - if (!isSetNumNulls()) { throw new org.apache.thrift.protocol.TProtocolException("Required field 'numNulls' is unset! Struct:" + toString()); } @@ -615,12 +606,16 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, LongColumnStatsDat struct.validate(); oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); - oprot.writeI64(struct.lowValue); - oprot.writeFieldEnd(); - oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); - oprot.writeI64(struct.highValue); - oprot.writeFieldEnd(); + if (struct.isSetLowValue()) { + oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); + oprot.writeI64(struct.lowValue); + oprot.writeFieldEnd(); + } + if (struct.isSetHighValue()) { + oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); + oprot.writeI64(struct.highValue); + oprot.writeFieldEnd(); + } oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); oprot.writeI64(struct.numNulls); oprot.writeFieldEnd(); @@ -644,23 +639,40 @@ public LongColumnStatsDataTupleScheme getScheme() { @Override public void write(org.apache.thrift.protocol.TProtocol prot, LongColumnStatsData struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; - oprot.writeI64(struct.lowValue); - oprot.writeI64(struct.highValue); oprot.writeI64(struct.numNulls); oprot.writeI64(struct.numDVs); + BitSet optionals = new BitSet(); + if (struct.isSetLowValue()) { + optionals.set(0); + } + if (struct.isSetHighValue()) { + optionals.set(1); + } + oprot.writeBitSet(optionals, 2); + if (struct.isSetLowValue()) { + oprot.writeI64(struct.lowValue); + } + if (struct.isSetHighValue()) { + oprot.writeI64(struct.highValue); + } } @Override public void read(org.apache.thrift.protocol.TProtocol prot, LongColumnStatsData struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - struct.lowValue = iprot.readI64(); - struct.setLowValueIsSet(true); - struct.highValue = iprot.readI64(); - struct.setHighValueIsSet(true); struct.numNulls = iprot.readI64(); struct.setNumNullsIsSet(true); struct.numDVs = iprot.readI64(); struct.setNumDVsIsSet(true); + BitSet incoming = iprot.readBitSet(2); + if (incoming.get(0)) { + struct.lowValue = iprot.readI64(); + struct.setLowValueIsSet(true); + } + if (incoming.get(1)) { + struct.highValue = iprot.readI64(); + struct.setHighValueIsSet(true); + } } } diff --git metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote old mode 100644 new mode 100755 diff --git metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index c4b583b..b3eeb89 100644 --- metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -2877,10 +2877,6 @@ def write(self, oprot): oprot.writeStructEnd() def validate(self): - if self.lowValue is None: - raise TProtocol.TProtocolException(message='Required field lowValue is unset!') - if self.highValue is None: - raise TProtocol.TProtocolException(message='Required field highValue is unset!') if self.numNulls is None: raise TProtocol.TProtocolException(message='Required field numNulls is unset!') if self.numDVs is None: @@ -2981,10 +2977,6 @@ def write(self, oprot): oprot.writeStructEnd() def validate(self): - if self.lowValue is None: - raise TProtocol.TProtocolException(message='Required field lowValue is unset!') - if self.highValue is None: - raise TProtocol.TProtocolException(message='Required field highValue is unset!') if self.numNulls is None: raise TProtocol.TProtocolException(message='Required field numNulls is unset!') if self.numDVs is None: @@ -3358,10 +3350,6 @@ def write(self, oprot): oprot.writeStructEnd() def validate(self): - if self.lowValue is None: - raise TProtocol.TProtocolException(message='Required field lowValue is unset!') - if self.highValue is None: - raise TProtocol.TProtocolException(message='Required field highValue is unset!') if self.numNulls is None: raise TProtocol.TProtocolException(message='Required field numNulls is unset!') if self.numDVs is None: diff --git metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index 79b7a1a..757461f 100644 --- metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -651,8 +651,8 @@ class DoubleColumnStatsData NUMDVS = 4 FIELDS = { - LOWVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'lowValue'}, - HIGHVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'highValue'}, + LOWVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'lowValue', :optional => true}, + HIGHVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'highValue', :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} } @@ -660,8 +660,6 @@ class DoubleColumnStatsData def struct_fields; FIELDS; end def validate - raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field lowValue is unset!') unless @lowValue - raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field highValue is unset!') unless @highValue raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numNulls is unset!') unless @numNulls raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numDVs is unset!') unless @numDVs end @@ -677,8 +675,8 @@ class LongColumnStatsData NUMDVS = 4 FIELDS = { - LOWVALUE => {:type => ::Thrift::Types::I64, :name => 'lowValue'}, - HIGHVALUE => {:type => ::Thrift::Types::I64, :name => 'highValue'}, + LOWVALUE => {:type => ::Thrift::Types::I64, :name => 'lowValue', :optional => true}, + HIGHVALUE => {:type => ::Thrift::Types::I64, :name => 'highValue', :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} } @@ -686,8 +684,6 @@ class LongColumnStatsData def struct_fields; FIELDS; end def validate - raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field lowValue is unset!') unless @lowValue - raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field highValue is unset!') unless @highValue raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numNulls is unset!') unless @numNulls raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numDVs is unset!') unless @numDVs end @@ -772,8 +768,8 @@ class DecimalColumnStatsData NUMDVS = 4 FIELDS = { - LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Decimal}, - HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Decimal}, + LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Decimal, :optional => true}, + HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Decimal, :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} } @@ -781,8 +777,6 @@ class DecimalColumnStatsData def struct_fields; FIELDS; end def validate - raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field lowValue is unset!') unless @lowValue - raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field highValue is unset!') unless @highValue raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numNulls is unset!') unless @numNulls raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numDVs is unset!') unless @numDVs end diff --git metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index dc0e266..c6b3789 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -21,12 +21,9 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -38,10 +35,8 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; -import org.apache.hadoop.hive.metastore.model.MFieldSchema; import org.apache.hadoop.hive.metastore.model.MPartition; import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics; -import org.apache.hadoop.hive.metastore.model.MStorageDescriptor; import org.apache.hadoop.hive.metastore.model.MTable; import org.apache.hadoop.hive.metastore.model.MTableColumnStatistics; @@ -74,15 +69,17 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl } else if (statsObj.getStatsData().isSetLongStats()) { LongColumnStatsData longStats = statsObj.getStatsData().getLongStats(); mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(), - longStats.getLowValue(), longStats.getHighValue()); + longStats.isSetLowValue() ? longStats.getLowValue() : null, + longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(), - doubleStats.getLowValue(), doubleStats.getHighValue()); + doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, + doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats(); - String low = createJdoDecimalString(decimalStats.getLowValue()), - high = createJdoDecimalString(decimalStats.getHighValue()); + String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null; + String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null; mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); @@ -99,18 +96,30 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl public static void setFieldsIntoOldStats( MTableColumnStatistics mStatsObj, MTableColumnStatistics oldStatsObj) { oldStatsObj.setAvgColLen(mStatsObj.getAvgColLen()); - oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); - oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); - oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); - oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); - oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); - oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue()); - oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); + if (mStatsObj.getLongHighValue() != null) { + oldStatsObj.setLongHighValue(mStatsObj.getLongHighValue()); + } + if (mStatsObj.getLongLowValue() != null) { + oldStatsObj.setLongLowValue(mStatsObj.getLongLowValue()); + } + if (mStatsObj.getDoubleLowValue() != null) { + oldStatsObj.setDoubleLowValue(mStatsObj.getDoubleLowValue()); + } + if (mStatsObj.getDoubleHighValue() != null) { + oldStatsObj.setDoubleHighValue(mStatsObj.getDoubleHighValue()); + } + if (mStatsObj.getDecimalLowValue() != null) { + oldStatsObj.setDecimalLowValue(mStatsObj.getDecimalLowValue()); + } + if (mStatsObj.getDecimalHighValue() != null) { + oldStatsObj.setDecimalHighValue(mStatsObj.getDecimalHighValue()); + } oldStatsObj.setMaxColLen(mStatsObj.getMaxColLen()); oldStatsObj.setNumDVs(mStatsObj.getNumDVs()); oldStatsObj.setNumFalses(mStatsObj.getNumFalses()); oldStatsObj.setNumTrues(mStatsObj.getNumTrues()); oldStatsObj.setNumNulls(mStatsObj.getNumNulls()); + oldStatsObj.setLastAnalyzed(mStatsObj.getLastAnalyzed()); } public static void setFieldsIntoOldStats( @@ -163,22 +172,40 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( colType.equals("timestamp")) { LongColumnStatsData longStats = new LongColumnStatsData(); longStats.setNumNulls(mStatsObj.getNumNulls()); - longStats.setHighValue(mStatsObj.getLongHighValue()); - longStats.setLowValue(mStatsObj.getLongLowValue()); + Long longHighValue = mStatsObj.getLongHighValue(); + if (longHighValue != null) { + longStats.setHighValue(longHighValue); + } + Long longLowValue = mStatsObj.getLongLowValue(); + if (longLowValue != null) { + longStats.setLowValue(longLowValue); + } longStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setLongStats(longStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); doubleStats.setNumNulls(mStatsObj.getNumNulls()); - doubleStats.setHighValue(mStatsObj.getDoubleHighValue()); - doubleStats.setLowValue(mStatsObj.getDoubleLowValue()); + Double doubleHighValue = mStatsObj.getDoubleHighValue(); + if (doubleHighValue != null) { + doubleStats.setHighValue(doubleHighValue); + } + Double doubleLowValue = mStatsObj.getDoubleLowValue(); + if (doubleLowValue != null) { + doubleStats.setLowValue(doubleLowValue); + } doubleStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setDoubleStats(doubleStats); } else if (colType.equals("decimal")) { DecimalColumnStatsData decimalStats = new DecimalColumnStatsData(); decimalStats.setNumNulls(mStatsObj.getNumNulls()); - decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue())); - decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue())); + String decimalHighValue = mStatsObj.getDecimalHighValue(); + if (decimalHighValue != null) { + decimalStats.setHighValue(createThriftDecimal(decimalHighValue)); + } + String decimalLowValue = mStatsObj.getDecimalLowValue(); + if (decimalLowValue != null) { + decimalStats.setLowValue(createThriftDecimal(decimalLowValue)); + } decimalStats.setNumDVs(mStatsObj.getNumDVs()); colStatsData.setDecimalStats(decimalStats); } @@ -219,15 +246,17 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( } else if (statsObj.getStatsData().isSetLongStats()) { LongColumnStatsData longStats = statsObj.getStatsData().getLongStats(); mColStats.setLongStats(longStats.getNumNulls(), longStats.getNumDVs(), - longStats.getLowValue(), longStats.getHighValue()); + longStats.isSetLowValue() ? longStats.getLowValue() : null, + longStats.isSetHighValue() ? longStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); mColStats.setDoubleStats(doubleStats.getNumNulls(), doubleStats.getNumDVs(), - doubleStats.getLowValue(), doubleStats.getHighValue()); + doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, + doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats(); - String low = createJdoDecimalString(decimalStats.getLowValue()), - high = createJdoDecimalString(decimalStats.getHighValue()); + String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null; + String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null; mColStats.setDecimalStats(decimalStats.getNumNulls(), decimalStats.getNumDVs(), low, high); } else if (statsObj.getStatsData().isSetStringStats()) { StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats(); diff --git metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index f61cdf0..1245d80 100644 --- metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -40,10 +40,10 @@ private String colName; private String colType; - private long longLowValue; - private long longHighValue; - private double doubleLowValue; - private double doubleHighValue; + private Long longLowValue; + private Long longHighValue; + private Double doubleLowValue; + private Double doubleHighValue; private String decimalLowValue; private String decimalHighValue; private long numNulls; @@ -166,14 +166,14 @@ public void setBooleanStats(long numTrues, long numFalses, long numNulls) { this.numNulls = numNulls; } - public void setLongStats(long numNulls, long numNDVs, long lowValue, long highValue) { + public void setLongStats(long numNulls, long numNDVs, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(long numNulls, long numNDVs, double lowValue, double highValue) { + public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.doubleLowValue = lowValue; @@ -200,7 +200,7 @@ public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) { this.maxColLen = maxColLen; this.avgColLen = avgColLen; } - public long getLongLowValue() { + public Long getLongLowValue() { return longLowValue; } @@ -208,7 +208,7 @@ public void setLongLowValue(long longLowValue) { this.longLowValue = longLowValue; } - public long getLongHighValue() { + public Long getLongHighValue() { return longHighValue; } @@ -216,7 +216,7 @@ public void setLongHighValue(long longHighValue) { this.longHighValue = longHighValue; } - public double getDoubleLowValue() { + public Double getDoubleLowValue() { return doubleLowValue; } @@ -224,7 +224,7 @@ public void setDoubleLowValue(double doubleLowValue) { this.doubleLowValue = doubleLowValue; } - public double getDoubleHighValue() { + public Double getDoubleHighValue() { return doubleHighValue; } diff --git metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 85f6427..44bbab5 100644 --- metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ metastore/src/model/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -38,10 +38,10 @@ private String colName; private String colType; - private long longLowValue; - private long longHighValue; - private double doubleLowValue; - private double doubleHighValue; + private Long longLowValue; + private Long longHighValue; + private Double doubleLowValue; + private Double doubleHighValue; private String decimalLowValue; private String decimalHighValue; private long numNulls; @@ -156,14 +156,14 @@ public void setBooleanStats(long numTrues, long numFalses, long numNulls) { this.numNulls = numNulls; } - public void setLongStats(long numNulls, long numNDVs, long lowValue, long highValue) { + public void setLongStats(long numNulls, long numNDVs, Long lowValue, Long highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.longLowValue = lowValue; this.longHighValue = highValue; } - public void setDoubleStats(long numNulls, long numNDVs, double lowValue, double highValue) { + public void setDoubleStats(long numNulls, long numNDVs, Double lowValue, Double highValue) { this.numNulls = numNulls; this.numDVs = numNDVs; this.doubleLowValue = lowValue; @@ -191,7 +191,7 @@ public void setBinaryStats(long numNulls, long maxColLen, double avgColLen) { this.avgColLen = avgColLen; } - public long getLongLowValue() { + public Long getLongLowValue() { return longLowValue; } @@ -199,7 +199,7 @@ public void setLongLowValue(long longLowValue) { this.longLowValue = longLowValue; } - public long getLongHighValue() { + public Long getLongHighValue() { return longHighValue; } @@ -207,7 +207,7 @@ public void setLongHighValue(long longHighValue) { this.longHighValue = longHighValue; } - public double getDoubleLowValue() { + public Double getDoubleLowValue() { return doubleLowValue; } @@ -215,7 +215,7 @@ public void setDoubleLowValue(double doubleLowValue) { this.doubleLowValue = doubleLowValue; } - public double getDoubleHighValue() { + public Double getDoubleHighValue() { return doubleHighValue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index 3dc02f0..47a6871 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -20,8 +20,6 @@ import java.io.IOException; import java.io.Serializable; -import java.math.BigDecimal; -import java.math.BigInteger; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -188,6 +186,9 @@ private void unpackBinaryStats(ObjectInspector oi, Object o, String fName, private void unpackPrimitiveObject (ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) { + if (o == null) { + return; + } // First infer the type of object if (fieldName.equals("columntype")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index ee4d56c..68f8fd8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -31,6 +31,8 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FetchTask; @@ -342,12 +344,14 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( ColumnStatisticsData statData = stats.get(0).getStatsData(); switch (type) { case Integeral: - oneRow.add(statData.getLongStats().getHighValue()); + LongColumnStatsData lstats = statData.getLongStats(); + oneRow.add(lstats.isSetHighValue() ? lstats.getHighValue() : null); ois.add(PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); break; case Double: - oneRow.add(statData.getDoubleStats().getHighValue()); + DoubleColumnStatsData dstats = statData.getDoubleStats(); + oneRow.add(dstats.isSetHighValue() ? dstats.getHighValue() : null); ois.add(PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); break; @@ -362,7 +366,7 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( tsOp.getConf().getAlias(), tsOp).getPartitions(); switch (type) { case Integeral: { - long maxVal = Long.MIN_VALUE; + Long maxVal = null; Collection> result = verifyAndGetPartStats(hive, tbl, colName, parts); if (result == null) { @@ -371,8 +375,12 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( for (List statObj : result) { ColumnStatisticsData statData = validateSingleColStat(statObj); if (statData == null) return null; - long curVal = statData.getLongStats().getHighValue(); - maxVal = Math.max(maxVal, curVal); + LongColumnStatsData lstats = statData.getLongStats(); + if (!lstats.isSetHighValue()) { + continue; + } + long curVal = lstats.getHighValue(); + maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal); } oneRow.add(maxVal); ois.add(PrimitiveObjectInspectorFactory. @@ -380,7 +388,7 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( break; } case Double: { - double maxVal = Double.MIN_VALUE; + Double maxVal = null; Collection> result = verifyAndGetPartStats(hive, tbl, colName, parts); if (result == null) { @@ -389,8 +397,12 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( for (List statObj : result) { ColumnStatisticsData statData = validateSingleColStat(statObj); if (statData == null) return null; + DoubleColumnStatsData dstats = statData.getDoubleStats(); + if (!dstats.isSetHighValue()) { + continue; + } double curVal = statData.getDoubleStats().getHighValue(); - maxVal = Math.max(maxVal, curVal); + maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal); } oneRow.add(maxVal); ois.add(PrimitiveObjectInspectorFactory. @@ -418,12 +430,14 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( .get(0).getStatsData(); switch (type) { case Integeral: - oneRow.add(statData.getLongStats().getLowValue()); + LongColumnStatsData lstats = statData.getLongStats(); + oneRow.add(lstats.isSetLowValue() ? lstats.getLowValue() : null); ois.add(PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); break; case Double: - oneRow.add(statData.getDoubleStats().getLowValue()); + DoubleColumnStatsData dstats = statData.getDoubleStats(); + oneRow.add(dstats.isSetLowValue() ? dstats.getLowValue() : null); ois.add(PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); break; @@ -436,7 +450,7 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( Set parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions(); switch(type) { case Integeral: { - long minVal = Long.MAX_VALUE; + Long minVal = null; Collection> result = verifyAndGetPartStats(hive, tbl, colName, parts); if (result == null) { @@ -445,8 +459,12 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( for (List statObj : result) { ColumnStatisticsData statData = validateSingleColStat(statObj); if (statData == null) return null; - long curVal = statData.getLongStats().getLowValue(); - minVal = Math.min(minVal, curVal); + LongColumnStatsData lstats = statData.getLongStats(); + if (!lstats.isSetLowValue()) { + continue; + } + long curVal = lstats.getLowValue(); + minVal = minVal == null ? curVal : Math.min(minVal, curVal); } oneRow.add(minVal); ois.add(PrimitiveObjectInspectorFactory. @@ -454,7 +472,7 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( break; } case Double: { - double minVal = Double.MAX_VALUE; + Double minVal = null; Collection> result = verifyAndGetPartStats(hive, tbl, colName, parts); if (result == null) { @@ -463,8 +481,12 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( for (List statObj : result) { ColumnStatisticsData statData = validateSingleColStat(statObj); if (statData == null) return null; + DoubleColumnStatsData dstats = statData.getDoubleStats(); + if (!dstats.isSetLowValue()) { + continue; + } double curVal = statData.getDoubleStats().getLowValue(); - minVal = Math.min(minVal, curVal); + minVal = minVal == null ? curVal : Math.min(minVal, curVal); } oneRow.add(minVal); ois.add(PrimitiveObjectInspectorFactory. diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 3b063eb..4077e1c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.ql.udf.generic; -import java.math.BigDecimal; import java.util.ArrayList; import java.util.List; @@ -30,7 +29,6 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -299,46 +297,49 @@ public Object terminate(AggregationBuffer agg) throws HiveException { } } - /** - * GenericUDAFLongStatsEvaluator. - * - */ - public static class GenericUDAFLongStatsEvaluator extends GenericUDAFEvaluator { + public static abstract class GenericUDAFNumericStatsEvaluator + extends GenericUDAFEvaluator { + + protected final static int MAX_BIT_VECTORS = 1024; /* Object Inspector corresponding to the input parameter. */ - private transient PrimitiveObjectInspector inputOI; - private transient PrimitiveObjectInspector numVectorsOI; - private final static int MAX_BIT_VECTORS = 1024; + protected transient PrimitiveObjectInspector inputOI; + protected transient PrimitiveObjectInspector numVectorsOI; - /* Partial aggregation result returned by TerminatePartial. Partial result is a struct - * containing a long field named "count". - */ - private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long * field within the struct - "count" */ - private transient StructObjectInspector soi; + protected transient StructObjectInspector soi; - private transient StructField minField; - private transient WritableLongObjectInspector minFieldOI; + protected transient StructField minField; + protected transient OI minFieldOI; - private transient StructField maxField; - private transient WritableLongObjectInspector maxFieldOI; + protected transient StructField maxField; + protected transient OI maxFieldOI; - private transient StructField countNullsField; - private transient WritableLongObjectInspector countNullsFieldOI; + protected transient StructField countNullsField; + protected transient WritableLongObjectInspector countNullsFieldOI; - private transient StructField ndvField; - private transient WritableStringObjectInspector ndvFieldOI; + protected transient StructField ndvField; + protected transient WritableStringObjectInspector ndvFieldOI; - private transient StructField numBitVectorsField; - private transient WritableIntObjectInspector numBitVectorsFieldOI; + protected transient StructField numBitVectorsField; + protected transient WritableIntObjectInspector numBitVectorsFieldOI; + + /* Partial aggregation result returned by TerminatePartial. Partial result is a struct + * containing a long field named "count". + */ + protected transient Object[] partialResult; /* Output of final result of the aggregation */ - private transient Object[] result; + protected transient Object[] result; + + protected transient boolean warned; + + protected abstract OI getValueObjectInspector(); @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { @@ -352,10 +353,10 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc soi = (StructObjectInspector) parameters[0]; minField = soi.getStructFieldRef("Min"); - minFieldOI = (WritableLongObjectInspector) minField.getFieldObjectInspector(); + minFieldOI = (OI) minField.getFieldObjectInspector(); maxField = soi.getStructFieldRef("Max"); - maxFieldOI = (WritableLongObjectInspector) maxField.getFieldObjectInspector(); + maxFieldOI = (OI) maxField.getFieldObjectInspector(); countNullsField = soi.getStructFieldRef("CountNulls"); countNullsFieldOI = (WritableLongObjectInspector) countNullsField.getFieldObjectInspector(); @@ -365,15 +366,15 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc numBitVectorsField = soi.getStructFieldRef("NumBitVectors"); numBitVectorsFieldOI = (WritableIntObjectInspector) - numBitVectorsField.getFieldObjectInspector(); + numBitVectorsField.getFieldObjectInspector(); } // initialize output if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { List foi = new ArrayList(); foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(getValueObjectInspector()); + foi.add(getValueObjectInspector()); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); @@ -388,19 +389,17 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc partialResult = new Object[6]; partialResult[0] = new Text(); - partialResult[1] = new LongWritable(0); - partialResult[2] = new LongWritable(0); partialResult[3] = new LongWritable(0); partialResult[4] = new Text(); partialResult[5] = new IntWritable(0); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, - foi); + foi); } else { List foi = new ArrayList(); foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(getValueObjectInspector()); + foi.add(getValueObjectInspector()); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); @@ -413,8 +412,6 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc result = new Object[5]; result[0] = new Text(); - result[1] = new LongWritable(0); - result[2] = new LongWritable(0); result[3] = new LongWritable(0); result[4] = new LongWritable(0); @@ -423,94 +420,88 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc } } + public abstract class NumericStatsAgg extends AbstractAggregationBuffer { - @AggregationType(estimable = true) - public static class LongStatsAgg extends AbstractAggregationBuffer { public String columnType; - public long min; /* Minimum value seen so far */ - public long max; /* Maximum value seen so far */ - public long countNulls; /* Count of number of null values seen so far */ - public LongNumDistinctValueEstimator numDV; /* Distinct value estimator */ - public boolean firstItem; /* First item in the aggBuf? */ - public int numBitVectors; + public V min; /* Minimum value seen so far */ + public V max; /* Maximum value seen so far */ + public long countNulls; /* Count of number of null values seen so far */ + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ + @Override public int estimate() { JavaDataModel model = JavaDataModel.get(); - return model.primitive1() * 2 + model.primitive2() * 3 + - model.lengthFor(columnType) + model.lengthFor(numDV); + return model.lengthFor(columnType) + model.primitive1() + model.primitive2() + + model.lengthFor(numDV); } - }; - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - LongStatsAgg result = new LongStatsAgg(); - reset(result); - return result; - } - public void initNDVEstimator(LongStatsAgg aggBuffer, int numBitVectors) { - aggBuffer.numDV = new LongNumDistinctValueEstimator(numBitVectors); - aggBuffer.numDV.reset(); - } + protected void initNDVEstimator(int numBitVectors) { + numDV = new NumDistinctValueEstimator(numBitVectors); + } - @Override - public void reset(AggregationBuffer agg) throws HiveException { - LongStatsAgg myagg = (LongStatsAgg) agg; - myagg.columnType = new String("Long"); - myagg.min = 0; - myagg.max = 0; - myagg.countNulls = 0; - myagg.firstItem = true; - } + protected abstract void update(Object p, PrimitiveObjectInspector inputOI); - boolean warned = false; + protected abstract void updateMin(Object minValue, OI minOI); - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - Object p = parameters[0]; - LongStatsAgg myagg = (LongStatsAgg) agg; - boolean emptyTable = false; + protected abstract void updateMax(Object maxValue, OI maxOI); - if (parameters[1] == null) { - emptyTable = true; + protected Object serialize(Object[] result) { + serializeCommon(result); + long dv = numDV != null ? numDV.estimateNumDistinctValues() : 0; + ((LongWritable) result[4]).set(dv); + + return result; } - if (myagg.firstItem) { - int numVectors = 0; - if (!emptyTable) { - numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); - } + protected Object serializePartial(Object[] result) { + // Serialize the rest of the values in the AggBuffer + serializeCommon(result); + + // Serialize numDistinctValue Estimator + Text t = numDV.serialize(); + ((Text) result[4]).set(t); + ((IntWritable) result[5]).set(numDV.getnumBitVectors()); + + return result; + } + + private void serializeCommon(Object[] result) { + // Serialize rest of the field in the AggBuffer + ((Text) result[0]).set(columnType); + result[1] = min; + result[2] = max; + ((LongWritable) result[3]).set(countNulls); + } + + public void reset(String type) throws HiveException { + columnType = type; + min = null; + max = null; + countNulls = 0; + numDV = null; + } + }; + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + NumericStatsAgg myagg = (NumericStatsAgg) agg; + + if (myagg.numDV == null) { + int numVectors = parameters[1] == null ? 0 : + PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); if (numVectors > MAX_BIT_VECTORS) { throw new HiveException("The maximum allowed value for number of bit vectors " + - " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); } - initNDVEstimator(myagg, numVectors); - myagg.firstItem = false; - myagg.numBitVectors = numVectors; + myagg.initNDVEstimator(numVectors); } - if (!emptyTable) { - //Update null counter if a null value is seen - if (p == null) { + if (parameters[0] == null) { myagg.countNulls++; - } - else { + } else { try { - long v = PrimitiveObjectInspectorUtils.getLong(p, inputOI); - - //Update min counter if new value is less than min seen so far - if (v < myagg.min) { - myagg.min = v; - } - - //Update max counter if new value is greater than max seen so far - if (v > myagg.max) { - myagg.max = v; - } - - // Add value to NumDistinctValue Estimator - myagg.numDV.addToEstimator(v); - + myagg.update(parameters[0], inputOI); } catch (NumberFormatException e) { if (!warned) { warned = true; @@ -521,387 +512,175 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep } } } - } } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { - LongStatsAgg myagg = (LongStatsAgg) agg; - - // Serialize numDistinctValue Estimator - Text t = myagg.numDV.serialize(); - - // Serialize rest of the field in the AggBuffer - ((Text) partialResult[0]).set(myagg.columnType); - ((LongWritable) partialResult[1]).set(myagg.min); - ((LongWritable) partialResult[2]).set(myagg.max); - ((LongWritable) partialResult[3]).set(myagg.countNulls); - ((Text) partialResult[4]).set(t); - ((IntWritable) partialResult[5]).set(myagg.numDV.getnumBitVectors()); + return ((NumericStatsAgg) agg).serializePartial(partialResult); + } - return partialResult; + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return ((NumericStatsAgg) agg).serialize(result); } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (partial != null) { - LongStatsAgg myagg = (LongStatsAgg) agg; + NumericStatsAgg myagg = (NumericStatsAgg) agg; - if (myagg.firstItem) { + if (myagg.numDV == null) { Object partialValue = soi.getStructFieldData(partial, numBitVectorsField); int numVectors = numBitVectorsFieldOI.get(partialValue); - initNDVEstimator(myagg, numVectors); - myagg.firstItem = false; - myagg.numBitVectors = numVectors; - + myagg.initNDVEstimator(numVectors); } // Update min if min is lesser than the smallest value seen so far - Object partialValue = soi.getStructFieldData(partial, minField); - if (myagg.min > minFieldOI.get(partialValue)) { - myagg.min = minFieldOI.get(partialValue); - } + Object minValue = soi.getStructFieldData(partial, minField); + myagg.updateMin(minValue, minFieldOI); // Update max if max is greater than the largest value seen so far - partialValue = soi.getStructFieldData(partial, maxField); - if (myagg.max < maxFieldOI.get(partialValue)) { - myagg.max = maxFieldOI.get(partialValue); - } + Object maxValue = soi.getStructFieldData(partial, maxField); + myagg.updateMax(maxValue, maxFieldOI); // Update the null counter - partialValue = soi.getStructFieldData(partial, countNullsField); - myagg.countNulls += countNullsFieldOI.get(partialValue); + Object countNull = soi.getStructFieldData(partial, countNullsField); + myagg.countNulls += countNullsFieldOI.get(countNull); // Merge numDistinctValue Estimators - partialValue = soi.getStructFieldData(partial, ndvField); - String v = ndvFieldOI.getPrimitiveJavaObject(partialValue); - NumDistinctValueEstimator o = new NumDistinctValueEstimator(v, myagg.numBitVectors); + Object numDistinct = soi.getStructFieldData(partial, ndvField); + String v = ndvFieldOI.getPrimitiveJavaObject(numDistinct); + NumDistinctValueEstimator o = + new NumDistinctValueEstimator(v, myagg.numDV.getnumBitVectors()); myagg.numDV.mergeEstimators(o); } } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - LongStatsAgg myagg = (LongStatsAgg) agg; - - long numDV = 0; - if (myagg.numBitVectors != 0) { - numDV = myagg.numDV.estimateNumDistinctValues(); - } - - // Serialize the result struct - ((Text) result[0]).set(myagg.columnType); - ((LongWritable) result[1]).set(myagg.min); - ((LongWritable) result[2]).set(myagg.max); - ((LongWritable) result[3]).set(myagg.countNulls); - ((LongWritable) result[4]).set(numDV); - - return result; - } } /** - * GenericUDAFDoubleStatsEvaluator. + * GenericUDAFLongStatsEvaluator. * */ - public static class GenericUDAFDoubleStatsEvaluator extends GenericUDAFEvaluator { - - /* Object Inspector corresponding to the input parameter. - */ - private transient PrimitiveObjectInspector inputOI; - private transient PrimitiveObjectInspector numVectorsOI; - private final static int MAX_BIT_VECTORS = 1024; - - /* Partial aggregation result returned by TerminatePartial. Partial result is a struct - * containing a long field named "count". - */ - private transient Object[] partialResult; - - /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long - * field within the struct - "count" - */ - private transient StructObjectInspector soi; - - private transient StructField minField; - private transient WritableDoubleObjectInspector minFieldOI; - - private transient StructField maxField; - private transient WritableDoubleObjectInspector maxFieldOI; - - private transient StructField countNullsField; - private transient WritableLongObjectInspector countNullsFieldOI; - - private transient StructField ndvField; - private transient WritableStringObjectInspector ndvFieldOI; - - private transient StructField numBitVectorsField; - private transient WritableIntObjectInspector numBitVectorsFieldOI; - - /* Output of final result of the aggregation - */ - private transient Object[] result; + public static class GenericUDAFLongStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - super.init(m, parameters); - - // initialize input - if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { - inputOI = (PrimitiveObjectInspector) parameters[0]; - numVectorsOI = (PrimitiveObjectInspector) parameters[1]; - } else { - soi = (StructObjectInspector) parameters[0]; - - minField = soi.getStructFieldRef("Min"); - minFieldOI = (WritableDoubleObjectInspector) minField.getFieldObjectInspector(); - - maxField = soi.getStructFieldRef("Max"); - maxFieldOI = (WritableDoubleObjectInspector) maxField.getFieldObjectInspector(); - - countNullsField = soi.getStructFieldRef("CountNulls"); - countNullsFieldOI = (WritableLongObjectInspector) countNullsField.getFieldObjectInspector(); - - ndvField = soi.getStructFieldRef("BitVector"); - ndvFieldOI = (WritableStringObjectInspector) ndvField.getFieldObjectInspector(); - - numBitVectorsField = soi.getStructFieldRef("NumBitVectors"); - numBitVectorsFieldOI = (WritableIntObjectInspector) - numBitVectorsField.getFieldObjectInspector(); - } - - // initialize output - if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { - List foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); - - List fname = new ArrayList(); - fname.add("ColumnType"); - fname.add("Min"); - fname.add("Max"); - fname.add("CountNulls"); - fname.add("BitVector"); - fname.add("NumBitVectors"); - - partialResult = new Object[6]; - partialResult[0] = new Text(); - partialResult[1] = new DoubleWritable(0); - partialResult[2] = new DoubleWritable(0); - partialResult[3] = new LongWritable(0); - partialResult[4] = new Text(); - partialResult[5] = new IntWritable(0); - - return ObjectInspectorFactory.getStandardStructObjectInspector(fname, - foi); - } else { - List foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - - List fname = new ArrayList(); - fname.add("ColumnType"); - fname.add("Min"); - fname.add("Max"); - fname.add("CountNulls"); - fname.add("NumDistinctValues"); - - result = new Object[5]; - result[0] = new Text(); - result[1] = new DoubleWritable(0); - result[2] = new DoubleWritable(0); - result[3] = new LongWritable(0); - result[4] = new LongWritable(0); - - return ObjectInspectorFactory.getStandardStructObjectInspector(fname, - foi); - } + protected LongObjectInspector getValueObjectInspector() { + return PrimitiveObjectInspectorFactory.javaLongObjectInspector; } @AggregationType(estimable = true) - public static class DoubleStatsAgg extends AbstractAggregationBuffer { - public String columnType; - public double min; /* Minimum value seen so far */ - public double max; /* Maximum value seen so far */ - public long countNulls; /* Count of number of null values seen so far */ - public DoubleNumDistinctValueEstimator numDV; /* Distinct value estimator */ - public boolean firstItem; /* First item in the aggBuf? */ - public int numBitVectors; + public class LongStatsAgg extends NumericStatsAgg { @Override public int estimate() { JavaDataModel model = JavaDataModel.get(); - return model.primitive1() * 2 + model.primitive2() * 3 + - model.lengthFor(columnType) + model.lengthFor(numDV); + return super.estimate() + model.primitive2() * 2; } - }; - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - DoubleStatsAgg result = new DoubleStatsAgg(); - reset(result); - return result; - } - - public void initNDVEstimator(DoubleStatsAgg aggBuffer, int numBitVectors) { - aggBuffer.numDV = new DoubleNumDistinctValueEstimator(numBitVectors); - aggBuffer.numDV.reset(); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - DoubleStatsAgg myagg = (DoubleStatsAgg) agg; - myagg.columnType = new String("Double"); - myagg.min = 0.0; - myagg.max = 0.0; - myagg.countNulls = 0; - myagg.firstItem = true; - } - - boolean warned = false; - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - Object p = parameters[0]; - DoubleStatsAgg myagg = (DoubleStatsAgg) agg; - boolean emptyTable = false; - - if (parameters[1] == null) { - emptyTable = true; - } - - if (myagg.firstItem) { - int numVectors = 0; - if (!emptyTable) { - numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + long v = PrimitiveObjectInspectorUtils.getLong(p, inputOI); + //Update min counter if new value is less than min seen so far + if (min == null || v < min) { + min = v; } - - if (numVectors > MAX_BIT_VECTORS) { - throw new HiveException("The maximum allowed value for number of bit vectors " + - " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + //Update max counter if new value is greater than max seen so far + if (max == null || v > max) { + max = v; } - - initNDVEstimator(myagg, numVectors); - myagg.firstItem = false; - myagg.numBitVectors = numVectors; + // Add value to NumDistinctValue Estimator + numDV.addToEstimator(v); } - if (!emptyTable) { - - //Update null counter if a null value is seen - if (p == null) { - myagg.countNulls++; + @Override + protected void updateMin(Object minValue, LongObjectInspector minFieldOI) { + if (min == null || (minValue != null && min > minFieldOI.get(minValue))) { + min = minFieldOI.get(minValue); } - else { - try { - - double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI); - - //Update min counter if new value is less than min seen so far - if (v < myagg.min) { - myagg.min = v; - } - - //Update max counter if new value is greater than max seen so far - if (v > myagg.max) { - myagg.max = v; - } - - // Add value to NumDistinctValue Estimator - myagg.numDV.addToEstimator(v); + } - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " - + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() - + " ignoring similar exceptions."); - } - } + @Override + protected void updateMax(Object maxValue, LongObjectInspector maxFieldOI) { + if (max == null || (maxValue != null && max < maxFieldOI.get(maxValue))) { + max = maxFieldOI.get(maxValue); } } - } + }; @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException { - DoubleStatsAgg myagg = (DoubleStatsAgg) agg; - - // Serialize numDistinctValue Estimator - Text t = myagg.numDV.serialize(); - - // Serialize the rest of the values in the AggBuffer - ((Text) partialResult[0]).set(myagg.columnType); - ((DoubleWritable) partialResult[1]).set(myagg.min); - ((DoubleWritable) partialResult[2]).set(myagg.max); - ((LongWritable) partialResult[3]).set(myagg.countNulls); - ((Text) partialResult[4]).set(t); - ((IntWritable) partialResult[5]).set(myagg.numBitVectors); - - return partialResult; + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new LongStatsAgg(); + reset(result); + return result; } @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if (partial != null) { - DoubleStatsAgg myagg = (DoubleStatsAgg) agg; - - if (myagg.firstItem) { - Object partialValue = soi.getStructFieldData(partial, numBitVectorsField); - int numVectors = numBitVectorsFieldOI.get(partialValue); - initNDVEstimator(myagg, numVectors); - myagg.firstItem = false; - myagg.numBitVectors = numVectors; - } - - // Update min if min is lesser than the smallest value seen so far - Object partialValue = soi.getStructFieldData(partial, minField); - if (myagg.min > minFieldOI.get(partialValue)) { - myagg.min = minFieldOI.get(partialValue); - } - - // Update max if max is greater than the largest value seen so far - partialValue = soi.getStructFieldData(partial, maxField); - if (myagg.max < maxFieldOI.get(partialValue)) { - myagg.max = maxFieldOI.get(partialValue); - } + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset("Long"); + } + } - // Update the null counter - partialValue = soi.getStructFieldData(partial, countNullsField); - myagg.countNulls += countNullsFieldOI.get(partialValue); + /** + * GenericUDAFDoubleStatsEvaluator. + * + */ + public static class GenericUDAFDoubleStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { - // Merge numDistinctValue Estimators - partialValue = soi.getStructFieldData(partial, ndvField); - String v = ndvFieldOI.getPrimitiveJavaObject(partialValue); + @Override + protected DoubleObjectInspector getValueObjectInspector() { + return PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; + } - NumDistinctValueEstimator o = new NumDistinctValueEstimator(v, myagg.numBitVectors); - myagg.numDV.mergeEstimators(o); + @AggregationType(estimable = true) + public class DoubleStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; } - } - @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - DoubleStatsAgg myagg = (DoubleStatsAgg) agg; - long numDV = 0; + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI); + //Update min counter if new value is less than min seen so far + if (min == null || v < min) { + min = v; + } + //Update max counter if new value is greater than max seen so far + if (max == null || v > max) { + max = v; + } + // Add value to NumDistinctValue Estimator + numDV.addToEstimator(v); + } - if (myagg.numBitVectors != 0) { - numDV = myagg.numDV.estimateNumDistinctValues(); + @Override + protected void updateMin(Object minValue, DoubleObjectInspector minFieldOI) { + if (min == null || (minValue != null && min > minFieldOI.get(minValue))) { + min = minFieldOI.get(minValue); + } } - // Serialize the result struct - ((Text) result[0]).set(myagg.columnType); - ((DoubleWritable) result[1]).set(myagg.min); - ((DoubleWritable) result[2]).set(myagg.max); - ((LongWritable) result[3]).set(myagg.countNulls); - ((LongWritable) result[4]).set(numDV); + @Override + protected void updateMax(Object maxValue, DoubleObjectInspector maxFieldOI) { + if (max == null || (maxValue != null && max < maxFieldOI.get(maxValue))) { + max = maxFieldOI.get(maxValue); + } + } + }; + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DoubleStatsAgg(); + reset(result); return result; } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset("Double"); + } } /** @@ -1475,304 +1254,64 @@ public Object terminate(AggregationBuffer agg) throws HiveException { } } - public static class GenericUDAFDecimalStatsEvaluator extends GenericUDAFEvaluator { - - /* - * Object Inspector corresponding to the input parameter. - */ - private transient PrimitiveObjectInspector inputOI; - private transient PrimitiveObjectInspector numVectorsOI; - private final static int MAX_BIT_VECTORS = 1024; - - /* Partial aggregation result returned by TerminatePartial. Partial result is a struct - * containing a long field named "count". - */ - private transient Object[] partialResult; - - /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long - * field within the struct - "count" - */ - private transient StructObjectInspector soi; - - private transient StructField minField; - private transient WritableHiveDecimalObjectInspector minFieldOI; - - private transient StructField maxField; - private transient WritableHiveDecimalObjectInspector maxFieldOI; - - private transient StructField countNullsField; - private transient WritableLongObjectInspector countNullsFieldOI; - - private transient StructField ndvField; - private transient WritableStringObjectInspector ndvFieldOI; - - private transient StructField numBitVectorsField; - private transient WritableIntObjectInspector numBitVectorsFieldOI; - - /* Output of final result of the aggregation - */ - private transient Object[] result; - - private boolean warned = false; + public static class GenericUDAFDecimalStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - super.init(m, parameters); - - // initialize input - if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { - inputOI = (PrimitiveObjectInspector) parameters[0]; - numVectorsOI = (PrimitiveObjectInspector) parameters[1]; - } else { - soi = (StructObjectInspector) parameters[0]; - - minField = soi.getStructFieldRef("Min"); - minFieldOI = (WritableHiveDecimalObjectInspector) minField.getFieldObjectInspector(); - - maxField = soi.getStructFieldRef("Max"); - maxFieldOI = (WritableHiveDecimalObjectInspector) maxField.getFieldObjectInspector(); - - countNullsField = soi.getStructFieldRef("CountNulls"); - countNullsFieldOI = (WritableLongObjectInspector) countNullsField.getFieldObjectInspector(); - - ndvField = soi.getStructFieldRef("BitVector"); - ndvFieldOI = (WritableStringObjectInspector) ndvField.getFieldObjectInspector(); - - numBitVectorsField = soi.getStructFieldRef("NumBitVectors"); - numBitVectorsFieldOI = (WritableIntObjectInspector) - numBitVectorsField.getFieldObjectInspector(); - } - - // initialize output - if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { - List foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector); - - List fname = new ArrayList(); - fname.add("ColumnType"); - fname.add("Min"); - fname.add("Max"); - fname.add("CountNulls"); - fname.add("BitVector"); - fname.add("NumBitVectors"); - - partialResult = new Object[6]; - partialResult[0] = new Text(); - partialResult[1] = new HiveDecimalWritable(HiveDecimal.create(0)); - partialResult[2] = new HiveDecimalWritable(HiveDecimal.create(0)); - partialResult[3] = new LongWritable(0); - partialResult[4] = new Text(); - partialResult[5] = new IntWritable(0); - - return ObjectInspectorFactory.getStandardStructObjectInspector(fname, - foi); - } else { - List foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - - List fname = new ArrayList(); - fname.add("ColumnType"); - fname.add("Min"); - fname.add("Max"); - fname.add("CountNulls"); - fname.add("NumDistinctValues"); - - result = new Object[5]; - result[0] = new Text(); - result[1] = new HiveDecimalWritable(HiveDecimal.create(0)); - result[2] = new HiveDecimalWritable(HiveDecimal.create(0)); - result[3] = new LongWritable(0); - result[4] = new LongWritable(0); - - return ObjectInspectorFactory.getStandardStructObjectInspector(fname, - foi); - } + protected HiveDecimalObjectInspector getValueObjectInspector() { + return PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector; } @AggregationType(estimable = true) - public static class DecimalStatsAgg extends AbstractAggregationBuffer { - public String columnType; - public HiveDecimal min; /* Minimum value seen so far */ - public HiveDecimal max; /* Maximum value seen so far */ - public long countNulls; /* Count of number of null values seen so far */ - public DecimalNumDistinctValueEstimator numDV; /* Distinct value estimator */ - public boolean firstItem; /* First item in the aggBuf? */ - public int numBitVectors; + public class DecimalStatsAgg extends NumericStatsAgg { @Override public int estimate() { JavaDataModel model = JavaDataModel.get(); - return model.primitive1() * 2 + model.primitive2() + model.lengthOfDecimal() * 2 + - model.lengthFor(columnType) + model.lengthFor(numDV); - } - }; - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - DecimalStatsAgg result = new DecimalStatsAgg(); - reset(result); - return result; - } - - public void initNDVEstimator(DecimalStatsAgg aggBuffer, int numBitVectors) { - aggBuffer.numDV = new DecimalNumDistinctValueEstimator(numBitVectors); - aggBuffer.numDV.reset(); - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - DecimalStatsAgg myagg = (DecimalStatsAgg) agg; - myagg.columnType = new String("Decimal"); - myagg.min = HiveDecimal.create(0); - myagg.max = HiveDecimal.create(0); - myagg.countNulls = 0; - myagg.firstItem = true; - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - Object p = parameters[0]; - DecimalStatsAgg myagg = (DecimalStatsAgg) agg; - boolean emptyTable = false; - - if (parameters[1] == null) { - emptyTable = true; + return super.estimate() + model.lengthOfDecimal() * 2; } - if (myagg.firstItem) { - int numVectors = 0; - if (!emptyTable) { - numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + HiveDecimal v = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI); + //Update min counter if new value is less than min seen so far + if (min == null || v.compareTo(min) < 0) { + min = v; } - - if (numVectors > MAX_BIT_VECTORS) { - throw new HiveException("The maximum allowed value for number of bit vectors " + - " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + //Update max counter if new value is greater than max seen so far + if (max == null || v.compareTo(max) > 0) { + max = v; } - - initNDVEstimator(myagg, numVectors); - myagg.firstItem = false; - myagg.numBitVectors = numVectors; + // Add value to NumDistinctValue Estimator + numDV.addToEstimator(v); } - if (!emptyTable) { - - //Update null counter if a null value is seen - if (p == null) { - myagg.countNulls++; + @Override + protected void updateMin(Object minValue, HiveDecimalObjectInspector minFieldOI) { + if (min == null || (minValue != null && + min.compareTo(minFieldOI.getPrimitiveJavaObject(minValue)) > 0)) { + min = minFieldOI.getPrimitiveJavaObject(minValue); } - else { - try { - - HiveDecimal v = PrimitiveObjectInspectorUtils.getHiveDecimal(p, inputOI); - - //Update min counter if new value is less than min seen so far - if (v.compareTo(myagg.min) < 0) { - myagg.min = v; - } - - //Update max counter if new value is greater than max seen so far - if (v.compareTo(myagg.max) > 0) { - myagg.max = v; - } - - // Add value to NumDistinctValue Estimator - myagg.numDV.addToEstimator(v); + } - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " - + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() - + " ignoring similar exceptions."); - } - } + @Override + protected void updateMax(Object maxValue, HiveDecimalObjectInspector maxFieldOI) { + if (max == null || (maxValue != null && + max.compareTo(maxFieldOI.getPrimitiveJavaObject(maxValue)) < 0)) { + max = maxFieldOI.getPrimitiveJavaObject(maxValue); } } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException { - DecimalStatsAgg myagg = (DecimalStatsAgg) agg; - - // Serialize numDistinctValue Estimator - Text t = myagg.numDV.serialize(); - - // Serialize the rest of the values in the AggBuffer - ((Text) partialResult[0]).set(myagg.columnType); - ((HiveDecimalWritable) partialResult[1]).set(myagg.min); - ((HiveDecimalWritable) partialResult[2]).set(myagg.max); - ((LongWritable) partialResult[3]).set(myagg.countNulls); - ((Text) partialResult[4]).set(t); - ((IntWritable) partialResult[5]).set(myagg.numBitVectors); - - return partialResult; - } + }; @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if (partial != null) { - DecimalStatsAgg myagg = (DecimalStatsAgg) agg; - - if (myagg.firstItem) { - Object partialValue = soi.getStructFieldData(partial, numBitVectorsField); - int numVectors = numBitVectorsFieldOI.get(partialValue); - initNDVEstimator(myagg, numVectors); - myagg.firstItem = false; - myagg.numBitVectors = numVectors; - } - - // Update min if min is lesser than the smallest value seen so far - Object partialValue = soi.getStructFieldData(partial, minField); - if (myagg.min.compareTo(minFieldOI.getPrimitiveJavaObject(partialValue)) > 0) { - myagg.min = minFieldOI.getPrimitiveJavaObject(partialValue); - } - - // Update max if max is greater than the largest value seen so far - partialValue = soi.getStructFieldData(partial, maxField); - if (myagg.max.compareTo(maxFieldOI.getPrimitiveJavaObject(partialValue)) < 0) { - myagg.max = maxFieldOI.getPrimitiveJavaObject(partialValue); - } - - // Update the null counter - partialValue = soi.getStructFieldData(partial, countNullsField); - myagg.countNulls += countNullsFieldOI.get(partialValue); - - // Merge numDistinctValue Estimators - partialValue = soi.getStructFieldData(partial, ndvField); - String v = ndvFieldOI.getPrimitiveJavaObject(partialValue); - - NumDistinctValueEstimator o = new NumDistinctValueEstimator(v, myagg.numBitVectors); - myagg.numDV.mergeEstimators(o); - } + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new DecimalStatsAgg(); + reset(result); + return result; } @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - DecimalStatsAgg myagg = (DecimalStatsAgg) agg; - long numDV = 0; - - if (myagg.numBitVectors != 0) { - numDV = myagg.numDV.estimateNumDistinctValues(); - } - - // Serialize the result struct - ((Text) result[0]).set(myagg.columnType); - ((HiveDecimalWritable) result[1]).set(myagg.min); - ((HiveDecimalWritable) result[2]).set(myagg.max); - ((LongWritable) result[3]).set(myagg.countNulls); - ((LongWritable) result[4]).set(numDV); - - return result; + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset("Decimal"); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java index 24159b8..2817044 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java @@ -22,6 +22,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.io.Text; public class NumDistinctValueEstimator { @@ -287,6 +288,25 @@ public void addToEstimatorPCSA(long v) { bitVector[hash%numBitVectors].set(index); } + public void addToEstimator(double d) { + int v = new Double(d).hashCode(); + addToEstimator(v); + } + + public void addToEstimatorPCSA(double d) { + int v = new Double(d).hashCode(); + addToEstimatorPCSA(v); + } + + public void addToEstimator(HiveDecimal decimal) { + int v = decimal.hashCode(); + addToEstimator(v); + } + + public void addToEstimatorPCSA(HiveDecimal decimal) { + int v = decimal.hashCode(); + addToEstimatorPCSA(v); + } public void mergeEstimators(NumDistinctValueEstimator o) { // Bitwise OR the bitvector with the bitvector in the agg buffer diff --git ql/src/test/queries/clientpositive/metadata_only_queries.q ql/src/test/queries/clientpositive/metadata_only_queries.q index b549a56..c7ae739 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries.q +++ ql/src/test/queries/clientpositive/metadata_only_queries.q @@ -27,7 +27,7 @@ create table stats_tbl( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary); @@ -40,7 +40,7 @@ create table stats_tbl_part( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary) partitioned by (dt string); @@ -62,11 +62,18 @@ analyze table stats_tbl_part partition(dt='2011') compute statistics for columns analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin; explain -select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl; -select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl; +explain +select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl; +select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl; + explain -select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part; -select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part; +explain +select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part; +select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part; explain select count(ts) from stats_tbl_part; diff --git ql/src/test/results/clientpositive/compute_stats_empty_table.q.out ql/src/test/results/clientpositive/compute_stats_empty_table.q.out index 50d6c8d..7e75469 100644 --- ql/src/test/results/clientpositive/compute_stats_empty_table.q.out +++ ql/src/test/results/clientpositive/compute_stats_empty_table.q.out @@ -33,7 +33,7 @@ POSTHOOK: query: select compute_stats(b, 16) from tab_empty POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_empty #### A masked pattern was here #### -{"columntype":"Long","min":0,"max":0,"countnulls":0,"numdistinctvalues":0} +{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0} PREHOOK: query: select compute_stats(c, 16) from tab_empty PREHOOK: type: QUERY PREHOOK: Input: default@tab_empty @@ -42,7 +42,7 @@ POSTHOOK: query: select compute_stats(c, 16) from tab_empty POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_empty #### A masked pattern was here #### -{"columntype":"Double","min":0.0,"max":0.0,"countnulls":0,"numdistinctvalues":0} +{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0} PREHOOK: query: select compute_stats(d, 16) from tab_empty PREHOOK: type: QUERY PREHOOK: Input: default@tab_empty diff --git ql/src/test/results/clientpositive/compute_stats_long.q.out ql/src/test/results/clientpositive/compute_stats_long.q.out index 2f5cbdd..1727a8e 100644 --- ql/src/test/results/clientpositive/compute_stats_long.q.out +++ ql/src/test/results/clientpositive/compute_stats_long.q.out @@ -34,4 +34,4 @@ select compute_stats(a, 16) from tab_int POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_int #### A masked pattern was here #### -{"columntype":"Long","min":0,"max":344,"countnulls":1,"numdistinctvalues":11} +{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11} diff --git ql/src/test/results/clientpositive/metadata_only_queries.q.out ql/src/test/results/clientpositive/metadata_only_queries.q.out index 950b4a0..a52bcee 100644 --- ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -48,7 +48,7 @@ PREHOOK: query: create table stats_tbl( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary) PREHOOK: type: CREATETABLE @@ -62,7 +62,7 @@ POSTHOOK: query: create table stats_tbl( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary) POSTHOOK: type: CREATETABLE @@ -77,7 +77,7 @@ PREHOOK: query: create table stats_tbl_part( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary) partitioned by (dt string) PREHOOK: type: CREATETABLE @@ -91,7 +91,7 @@ POSTHOOK: query: create table stats_tbl_part( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary) partitioned by (dt string) POSTHOOK: type: CREATETABLE @@ -320,10 +320,10 @@ POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### PREHOOK: query: explain -select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl PREHOOK: type: QUERY POSTHOOK: query: explain -select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage @@ -335,18 +335,64 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl +PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl PREHOOK: type: QUERY #### A masked pattern was here #### -POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl +POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl POSTHOOK: type: QUERY #### A masked pattern was here #### -9999 9999 1999.8 9999 9999 9999 9999 9999 65791 0 99.9800033569336 0.0 +9999 9999 1999.8 9999 9999 9999 9999 9999 +PREHOOK: query: explain +select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl +PREHOOK: type: QUERY +POSTHOOK: query: explain +select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl +PREHOOK: type: QUERY +#### A masked pattern was here #### +POSTHOOK: query: select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl +POSTHOOK: type: QUERY +#### A masked pattern was here #### +65536 65791 4294967296 4294967551 0.009999999776482582 99.9800033569336 0.01 50.0 PREHOOK: query: explain -select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain -select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part +PREHOOK: type: QUERY +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl_part +POSTHOOK: type: QUERY +#### A masked pattern was here #### +9489 9489 1897.8 9489 9489 9489 9489 9489 +PREHOOK: query: explain +select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part +PREHOOK: type: QUERY +POSTHOOK: query: explain +select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage @@ -358,13 +404,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part +PREHOOK: query: select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part PREHOOK: type: QUERY #### A masked pattern was here #### -POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part +POSTHOOK: query: select min(i), max(i), min(b), max(b), min(f), max(f), min(d), max(d) from stats_tbl_part POSTHOOK: type: QUERY #### A masked pattern was here #### -9489 9489 1897.8 9489 9489 9489 9489 9489 65791 0 99.9800033569336 0.0 +65536 65791 4294967296 4294967551 0.009999999776482582 99.9800033569336 0.01 50.0 PREHOOK: query: explain select count(ts) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain select count(ts) from stats_tbl_part diff --git ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out index 451da09..62c9cc3 100644 --- ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out @@ -162,7 +162,7 @@ PREHOOK: type: QUERY POSTHOOK: query: select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 POSTHOOK: type: QUERY #### A masked pattern was here #### -2322 2322 2322 2322 2322 2322 2322 65791 0 99.9800033569336 0.0 +2322 2322 2322 2322 2322 2322 2322 65791 4294967296 99.9800033569336 0.03 PREHOOK: query: explain select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 PREHOOK: type: QUERY @@ -185,7 +185,7 @@ PREHOOK: type: QUERY POSTHOOK: query: select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt > 2010 POSTHOOK: type: QUERY #### A masked pattern was here #### -2219 2219 2219 4438 2219 2219 2219 2219 65791 0 99.95999908447266 0.0 +2219 2219 2219 4438 2219 2219 2219 2219 65791 4294967296 99.95999908447266 0.04 PREHOOK: query: drop table stats_tbl_part PREHOOK: type: DROPTABLE PREHOOK: Input: default@stats_tbl_part