diff --git a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java index 9193f80..7c9d72f 100644 --- a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java @@ -21,9 +21,13 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * HiveStatsUtils. @@ -32,6 +36,7 @@ */ public class HiveStatsUtils { + private static final Logger LOG = LoggerFactory.getLogger(HiveStatsUtils.class); /** * Get all file status from a root path and recursively go deep into certain levels. @@ -73,4 +78,58 @@ return fs.globStatus(pathPattern, FileUtils.HIDDEN_FILES_PATH_FILTER); } + public static int getNumBitVectorsForNDVEstimation(Configuration conf) throws Exception { + int numBitVectors; + float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR); + + if (percentageError < 0.0) { + throw new Exception("hive.stats.ndv.error can't be negative"); + } else if (percentageError <= 2.4) { + numBitVectors = 1024; + LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%"); + LOG.info("Choosing 1024 bit vectors.."); + } else if (percentageError <= 3.4 ) { + numBitVectors = 1024; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 1024 bit vectors.."); + } else if (percentageError <= 4.8) { + numBitVectors = 512; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 512 bit vectors.."); + } else if (percentageError <= 6.8) { + numBitVectors = 256; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 256 bit vectors.."); + } else if (percentageError <= 9.7) { + numBitVectors = 128; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 128 bit vectors.."); + } else if (percentageError <= 13.8) { + numBitVectors = 64; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 64 bit vectors.."); + } else if (percentageError <= 19.6) { + numBitVectors = 32; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 32 bit vectors.."); + } else if (percentageError <= 28.2) { + numBitVectors = 16; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 16 bit vectors.."); + } else if (percentageError <= 40.9) { + numBitVectors = 8; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 8 bit vectors.."); + } else if (percentageError <= 61.0) { + numBitVectors = 4; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 4 bit vectors.."); + } else { + numBitVectors = 2; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 2 bit vectors.."); + } + return numBitVectors; + } + } diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift index 81837e6..9d8c092 100755 --- a/metastore/if/hive_metastore.thrift +++ b/metastore/if/hive_metastore.thrift @@ -325,34 +325,39 @@ struct Index { struct BooleanColumnStatsData { 1: required i64 numTrues, 2: required i64 numFalses, -3: required i64 numNulls +3: required i64 numNulls, +4: optional string bitVectors } struct DoubleColumnStatsData { 1: optional double lowValue, 2: optional double highValue, 3: required i64 numNulls, -4: required i64 numDVs +4: required i64 numDVs, +5: optional string bitVectors } struct LongColumnStatsData { 1: optional i64 lowValue, 2: optional i64 highValue, 3: required i64 numNulls, -4: required i64 numDVs +4: required i64 numDVs, +5: optional string bitVectors } struct StringColumnStatsData { 1: required i64 maxColLen, 2: required double avgColLen, 3: required i64 numNulls, -4: required i64 numDVs +4: required i64 numDVs, +5: optional string bitVectors } struct BinaryColumnStatsData { 1: required i64 maxColLen, 2: required double avgColLen, -3: required i64 numNulls +3: required i64 numNulls, +4: optional string bitVectors } @@ -365,7 +370,8 @@ struct DecimalColumnStatsData { 1: optional Decimal lowValue, 2: optional Decimal highValue, 3: required i64 numNulls, -4: required i64 numDVs +4: required i64 numDVs, +5: optional string bitVectors } struct Date { @@ -376,7 +382,8 @@ struct DateColumnStatsData { 1: optional Date lowValue, 2: optional Date highValue, 3: required i64 numNulls, -4: required i64 numDVs +4: required i64 numDVs, +5: optional string bitVectors } union ColumnStatisticsData { diff --git a/metastore/pom.xml b/metastore/pom.xml index a8e84a1..18c1f9c 100644 --- a/metastore/pom.xml +++ b/metastore/pom.xml @@ -44,6 +44,11 @@ hive-shims ${project.version} + + javolution + javolution + ${javolution.version} + com.google.guava diff --git a/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java b/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java index 39a7278..3b2d7b5 100644 --- a/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java +++ b/metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java @@ -3918,6 +3918,21 @@ public Builder clearMaxCacheEntryLife() { */ com.google.protobuf.ByteString getColumnNameBytes(); + + // optional string bit_vectors = 12; + /** + * optional string bit_vectors = 12; + */ + boolean hasBitVectors(); + /** + * optional string bit_vectors = 12; + */ + java.lang.String getBitVectors(); + /** + * optional string bit_vectors = 12; + */ + com.google.protobuf.ByteString + getBitVectorsBytes(); } /** * Protobuf type {@code org.apache.hadoop.hive.metastore.hbase.ColumnStats} @@ -4073,6 +4088,11 @@ private ColumnStats( columnName_ = input.readBytes(); break; } + case 98: { + bitField0_ |= 0x00000800; + bitVectors_ = input.readBytes(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -7506,6 +7526,49 @@ public boolean hasColumnName() { } } + // optional string bit_vectors = 12; + public static final int BIT_VECTORS_FIELD_NUMBER = 12; + private java.lang.Object bitVectors_; + /** + * optional string bit_vectors = 12; + */ + public boolean hasBitVectors() { + return ((bitField0_ & 0x00000800) == 0x00000800); + } + /** + * optional string bit_vectors = 12; + */ + public java.lang.String getBitVectors() { + java.lang.Object ref = bitVectors_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + bitVectors_ = s; + } + return s; + } + } + /** + * optional string bit_vectors = 12; + */ + public com.google.protobuf.ByteString + getBitVectorsBytes() { + java.lang.Object ref = bitVectors_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + bitVectors_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + private void initFields() { lastAnalyzed_ = 0L; columnType_ = ""; @@ -7518,6 +7581,7 @@ private void initFields() { binaryStats_ = org.apache.hadoop.hive.metastore.hbase.HbaseMetastoreProto.ColumnStats.StringStats.getDefaultInstance(); decimalStats_ = org.apache.hadoop.hive.metastore.hbase.HbaseMetastoreProto.ColumnStats.DecimalStats.getDefaultInstance(); columnName_ = ""; + bitVectors_ = ""; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -7574,6 +7638,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (((bitField0_ & 0x00000400) == 0x00000400)) { output.writeBytes(11, getColumnNameBytes()); } + if (((bitField0_ & 0x00000800) == 0x00000800)) { + output.writeBytes(12, getBitVectorsBytes()); + } getUnknownFields().writeTo(output); } @@ -7627,6 +7694,10 @@ public int getSerializedSize() { size += com.google.protobuf.CodedOutputStream .computeBytesSize(11, getColumnNameBytes()); } + if (((bitField0_ & 0x00000800) == 0x00000800)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(12, getBitVectorsBytes()); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -7795,6 +7866,8 @@ public Builder clear() { bitField0_ = (bitField0_ & ~0x00000200); columnName_ = ""; bitField0_ = (bitField0_ & ~0x00000400); + bitVectors_ = ""; + bitField0_ = (bitField0_ & ~0x00000800); return this; } @@ -7891,6 +7964,10 @@ public Builder clone() { to_bitField0_ |= 0x00000400; } result.columnName_ = columnName_; + if (((from_bitField0_ & 0x00000800) == 0x00000800)) { + to_bitField0_ |= 0x00000800; + } + result.bitVectors_ = bitVectors_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -7944,6 +8021,11 @@ public Builder mergeFrom(org.apache.hadoop.hive.metastore.hbase.HbaseMetastorePr columnName_ = other.columnName_; onChanged(); } + if (other.hasBitVectors()) { + bitField0_ |= 0x00000800; + bitVectors_ = other.bitVectors_; + onChanged(); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -8930,6 +9012,80 @@ public Builder setColumnNameBytes( return this; } + // optional string bit_vectors = 12; + private java.lang.Object bitVectors_ = ""; + /** + * optional string bit_vectors = 12; + */ + public boolean hasBitVectors() { + return ((bitField0_ & 0x00000800) == 0x00000800); + } + /** + * optional string bit_vectors = 12; + */ + public java.lang.String getBitVectors() { + java.lang.Object ref = bitVectors_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + bitVectors_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * optional string bit_vectors = 12; + */ + public com.google.protobuf.ByteString + getBitVectorsBytes() { + java.lang.Object ref = bitVectors_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + bitVectors_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * optional string bit_vectors = 12; + */ + public Builder setBitVectors( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000800; + bitVectors_ = value; + onChanged(); + return this; + } + /** + * optional string bit_vectors = 12; + */ + public Builder clearBitVectors() { + bitField0_ = (bitField0_ & ~0x00000800); + bitVectors_ = getDefaultInstance().getBitVectors(); + onChanged(); + return this; + } + /** + * optional string bit_vectors = 12; + */ + public Builder setBitVectorsBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000800; + bitVectors_ = value; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.metastore.hbase.ColumnStats) } @@ -34506,7 +34662,7 @@ public Builder removeRange(int index) { "grStatsInvalidatorFilter.Entry\022\021\n\trun_ev" + "ery\030\002 \002(\003\022\034\n\024max_cache_entry_life\030\003 \002(\003\032" + "?\n\005Entry\022\017\n\007db_name\030\001 \002(\014\022\022\n\ntable_name\030" + - "\002 \002(\014\022\021\n\tpart_name\030\003 \002(\014\"\335\010\n\013ColumnStats" + + "\002 \002(\014\022\021\n\tpart_name\030\003 \002(\014\"\362\010\n\013ColumnStats" + "\022\025\n\rlast_analyzed\030\001 \001(\003\022\023\n\013column_type\030\002" + " \002(\t\022\021\n\tnum_nulls\030\003 \001(\003\022\033\n\023num_distinct_" + "values\030\004 \001(\003\022T\n\nbool_stats\030\005 \001(\0132@.org.a", @@ -34522,132 +34678,133 @@ public Builder removeRange(int index) { "ve.metastore.hbase.ColumnStats.StringSta", "ts\022W\n\rdecimal_stats\030\n \001(\0132@.org.apache.h" + "adoop.hive.metastore.hbase.ColumnStats.D" + - "ecimalStats\022\023\n\013column_name\030\013 \001(\t\0325\n\014Bool" + - "eanStats\022\021\n\tnum_trues\030\001 \001(\003\022\022\n\nnum_false" + - "s\030\002 \001(\003\0322\n\tLongStats\022\021\n\tlow_value\030\001 \001(\022\022" + - "\022\n\nhigh_value\030\002 \001(\022\0324\n\013DoubleStats\022\021\n\tlo" + - "w_value\030\001 \001(\001\022\022\n\nhigh_value\030\002 \001(\001\032=\n\013Str" + - "ingStats\022\026\n\016max_col_length\030\001 \001(\003\022\026\n\016avg_" + - "col_length\030\002 \001(\001\032\365\001\n\014DecimalStats\022[\n\tlow" + - "_value\030\001 \001(\0132H.org.apache.hadoop.hive.me", - "tastore.hbase.ColumnStats.DecimalStats.D" + - "ecimal\022\\\n\nhigh_value\030\002 \001(\0132H.org.apache." + - "hadoop.hive.metastore.hbase.ColumnStats." + - "DecimalStats.Decimal\032*\n\007Decimal\022\020\n\010unsca" + - "led\030\001 \002(\014\022\r\n\005scale\030\002 \002(\005\"\246\002\n\010Database\022\023\n" + - "\013description\030\001 \001(\t\022\013\n\003uri\030\002 \001(\t\022F\n\nparam" + - "eters\030\003 \001(\01322.org.apache.hadoop.hive.met" + - "astore.hbase.Parameters\022Q\n\nprivileges\030\004 " + - "\001(\0132=.org.apache.hadoop.hive.metastore.h" + - "base.PrincipalPrivilegeSet\022\022\n\nowner_name", - "\030\005 \001(\t\022I\n\nowner_type\030\006 \001(\01625.org.apache." + - "hadoop.hive.metastore.hbase.PrincipalTyp" + - "e\"$\n\017DelegationToken\022\021\n\ttoken_str\030\001 \002(\t\"" + - ":\n\013FieldSchema\022\014\n\004name\030\001 \002(\t\022\014\n\004type\030\002 \002" + - "(\t\022\017\n\007comment\030\003 \001(\t\"\206\004\n\010Function\022\022\n\nclas" + - "s_name\030\001 \001(\t\022\022\n\nowner_name\030\002 \001(\t\022I\n\nowne" + - "r_type\030\003 \001(\01625.org.apache.hadoop.hive.me" + - "tastore.hbase.PrincipalType\022\023\n\013create_ti" + - "me\030\004 \001(\022\022T\n\rfunction_type\030\005 \001(\0162=.org.ap" + - "ache.hadoop.hive.metastore.hbase.Functio", - "n.FunctionType\022S\n\rresource_uris\030\006 \003(\0132<." + - "org.apache.hadoop.hive.metastore.hbase.F" + - "unction.ResourceUri\032\254\001\n\013ResourceUri\022`\n\rr" + - "esource_type\030\001 \002(\0162I.org.apache.hadoop.h" + + "ecimalStats\022\023\n\013column_name\030\013 \001(\t\022\023\n\013bit_" + + "vectors\030\014 \001(\t\0325\n\014BooleanStats\022\021\n\tnum_tru" + + "es\030\001 \001(\003\022\022\n\nnum_falses\030\002 \001(\003\0322\n\tLongStat" + + "s\022\021\n\tlow_value\030\001 \001(\022\022\022\n\nhigh_value\030\002 \001(\022" + + "\0324\n\013DoubleStats\022\021\n\tlow_value\030\001 \001(\001\022\022\n\nhi" + + "gh_value\030\002 \001(\001\032=\n\013StringStats\022\026\n\016max_col" + + "_length\030\001 \001(\003\022\026\n\016avg_col_length\030\002 \001(\001\032\365\001" + + "\n\014DecimalStats\022[\n\tlow_value\030\001 \001(\0132H.org.", + "apache.hadoop.hive.metastore.hbase.Colum" + + "nStats.DecimalStats.Decimal\022\\\n\nhigh_valu" + + "e\030\002 \001(\0132H.org.apache.hadoop.hive.metasto" + + "re.hbase.ColumnStats.DecimalStats.Decima" + + "l\032*\n\007Decimal\022\020\n\010unscaled\030\001 \002(\014\022\r\n\005scale\030" + + "\002 \002(\005\"\246\002\n\010Database\022\023\n\013description\030\001 \001(\t\022" + + "\013\n\003uri\030\002 \001(\t\022F\n\nparameters\030\003 \001(\01322.org.a" + + "pache.hadoop.hive.metastore.hbase.Parame" + + "ters\022Q\n\nprivileges\030\004 \001(\0132=.org.apache.ha" + + "doop.hive.metastore.hbase.PrincipalPrivi", + "legeSet\022\022\n\nowner_name\030\005 \001(\t\022I\n\nowner_typ" + + "e\030\006 \001(\01625.org.apache.hadoop.hive.metasto" + + "re.hbase.PrincipalType\"$\n\017DelegationToke" + + "n\022\021\n\ttoken_str\030\001 \002(\t\":\n\013FieldSchema\022\014\n\004n" + + "ame\030\001 \002(\t\022\014\n\004type\030\002 \002(\t\022\017\n\007comment\030\003 \001(\t" + + "\"\206\004\n\010Function\022\022\n\nclass_name\030\001 \001(\t\022\022\n\nown" + + "er_name\030\002 \001(\t\022I\n\nowner_type\030\003 \001(\01625.org." + + "apache.hadoop.hive.metastore.hbase.Princ" + + "ipalType\022\023\n\013create_time\030\004 \001(\022\022T\n\rfunctio" + + "n_type\030\005 \001(\0162=.org.apache.hadoop.hive.me", + "tastore.hbase.Function.FunctionType\022S\n\rr" + + "esource_uris\030\006 \003(\0132<.org.apache.hadoop.h" + "ive.metastore.hbase.Function.ResourceUri" + - ".ResourceType\022\013\n\003uri\030\002 \002(\t\".\n\014ResourceTy" + - "pe\022\007\n\003JAR\020\001\022\010\n\004FILE\020\002\022\013\n\007ARCHIVE\020\003\"\030\n\014Fu" + - "nctionType\022\010\n\004JAVA\020\001\"\037\n\tMasterKey\022\022\n\nmas" + - "ter_key\030\001 \002(\t\",\n\016ParameterEntry\022\013\n\003key\030\001" + - " \002(\t\022\r\n\005value\030\002 \002(\t\"W\n\nParameters\022I\n\tpar", - "ameter\030\001 \003(\01326.org.apache.hadoop.hive.me" + - "tastore.hbase.ParameterEntry\"\360\001\n\tPartiti" + - "on\022\023\n\013create_time\030\001 \001(\003\022\030\n\020last_access_t" + - "ime\030\002 \001(\003\022\020\n\010location\030\003 \001(\t\022I\n\rsd_parame" + - "ters\030\004 \001(\01322.org.apache.hadoop.hive.meta" + - "store.hbase.Parameters\022\017\n\007sd_hash\030\005 \002(\014\022" + - "F\n\nparameters\030\006 \001(\01322.org.apache.hadoop." + - "hive.metastore.hbase.Parameters\"\204\001\n\032Prin" + - "cipalPrivilegeSetEntry\022\026\n\016principal_name" + - "\030\001 \002(\t\022N\n\nprivileges\030\002 \003(\0132:.org.apache.", - "hadoop.hive.metastore.hbase.PrivilegeGra" + - "ntInfo\"\275\001\n\025PrincipalPrivilegeSet\022Q\n\005user" + - "s\030\001 \003(\0132B.org.apache.hadoop.hive.metasto" + - "re.hbase.PrincipalPrivilegeSetEntry\022Q\n\005r" + - "oles\030\002 \003(\0132B.org.apache.hadoop.hive.meta" + - "store.hbase.PrincipalPrivilegeSetEntry\"\260" + - "\001\n\022PrivilegeGrantInfo\022\021\n\tprivilege\030\001 \001(\t" + - "\022\023\n\013create_time\030\002 \001(\003\022\017\n\007grantor\030\003 \001(\t\022K" + - "\n\014grantor_type\030\004 \001(\01625.org.apache.hadoop" + - ".hive.metastore.hbase.PrincipalType\022\024\n\014g", - "rant_option\030\005 \001(\010\"\374\001\n\rRoleGrantInfo\022\026\n\016p" + - "rincipal_name\030\001 \002(\t\022M\n\016principal_type\030\002 " + - "\002(\01625.org.apache.hadoop.hive.metastore.h" + - "base.PrincipalType\022\020\n\010add_time\030\003 \001(\003\022\017\n\007" + - "grantor\030\004 \001(\t\022K\n\014grantor_type\030\005 \001(\01625.or" + - "g.apache.hadoop.hive.metastore.hbase.Pri" + - "ncipalType\022\024\n\014grant_option\030\006 \001(\010\"^\n\021Role" + - "GrantInfoList\022I\n\ngrant_info\030\001 \003(\01325.org." + - "apache.hadoop.hive.metastore.hbase.RoleG" + - "rantInfo\"\030\n\010RoleList\022\014\n\004role\030\001 \003(\t\"/\n\004Ro", - "le\022\023\n\013create_time\030\001 \001(\003\022\022\n\nowner_name\030\002 " + - "\001(\t\"\254\010\n\021StorageDescriptor\022A\n\004cols\030\001 \003(\0132" + - "3.org.apache.hadoop.hive.metastore.hbase" + - ".FieldSchema\022\024\n\014input_format\030\002 \001(\t\022\025\n\rou" + - "tput_format\030\003 \001(\t\022\025\n\ris_compressed\030\004 \001(\010" + - "\022\023\n\013num_buckets\030\005 \001(\021\022W\n\nserde_info\030\006 \001(" + - "\0132C.org.apache.hadoop.hive.metastore.hba" + - "se.StorageDescriptor.SerDeInfo\022\023\n\013bucket" + - "_cols\030\007 \003(\t\022R\n\tsort_cols\030\010 \003(\0132?.org.apa" + - "che.hadoop.hive.metastore.hbase.StorageD", - "escriptor.Order\022Y\n\013skewed_info\030\t \001(\0132D.o" + - "rg.apache.hadoop.hive.metastore.hbase.St" + - "orageDescriptor.SkewedInfo\022!\n\031stored_as_" + - "sub_directories\030\n \001(\010\032.\n\005Order\022\023\n\013column" + - "_name\030\001 \002(\t\022\020\n\005order\030\002 \001(\021:\0011\032|\n\tSerDeIn" + - "fo\022\014\n\004name\030\001 \001(\t\022\031\n\021serialization_lib\030\002 " + - "\001(\t\022F\n\nparameters\030\003 \001(\01322.org.apache.had" + - "oop.hive.metastore.hbase.Parameters\032\214\003\n\n" + - "SkewedInfo\022\030\n\020skewed_col_names\030\001 \003(\t\022r\n\021" + - "skewed_col_values\030\002 \003(\0132W.org.apache.had", - "oop.hive.metastore.hbase.StorageDescript" + - "or.SkewedInfo.SkewedColValueList\022\206\001\n\036ske" + - "wed_col_value_location_maps\030\003 \003(\0132^.org." + - "apache.hadoop.hive.metastore.hbase.Stora" + - "geDescriptor.SkewedInfo.SkewedColValueLo" + - "cationMap\032.\n\022SkewedColValueList\022\030\n\020skewe" + - "d_col_value\030\001 \003(\t\0327\n\031SkewedColValueLocat" + - "ionMap\022\013\n\003key\030\001 \003(\t\022\r\n\005value\030\002 \002(\t\"\220\004\n\005T" + - "able\022\r\n\005owner\030\001 \001(\t\022\023\n\013create_time\030\002 \001(\003" + - "\022\030\n\020last_access_time\030\003 \001(\003\022\021\n\tretention\030", - "\004 \001(\003\022\020\n\010location\030\005 \001(\t\022I\n\rsd_parameters" + - "\030\006 \001(\01322.org.apache.hadoop.hive.metastor" + - "e.hbase.Parameters\022\017\n\007sd_hash\030\007 \002(\014\022K\n\016p" + - "artition_keys\030\010 \003(\01323.org.apache.hadoop." + - "hive.metastore.hbase.FieldSchema\022F\n\npara" + - "meters\030\t \001(\01322.org.apache.hadoop.hive.me" + - "tastore.hbase.Parameters\022\032\n\022view_origina" + - "l_text\030\n \001(\t\022\032\n\022view_expanded_text\030\013 \001(\t" + - "\022\022\n\ntable_type\030\014 \001(\t\022Q\n\nprivileges\030\r \001(\013" + - "2=.org.apache.hadoop.hive.metastore.hbas", - "e.PrincipalPrivilegeSet\022\024\n\014is_temporary\030" + - "\016 \001(\010\"\353\004\n\026PartitionKeyComparator\022\r\n\005name" + - "s\030\001 \002(\t\022\r\n\005types\030\002 \002(\t\022S\n\002op\030\003 \003(\0132G.org" + - ".apache.hadoop.hive.metastore.hbase.Part" + - "itionKeyComparator.Operator\022S\n\005range\030\004 \003" + - "(\0132D.org.apache.hadoop.hive.metastore.hb" + - "ase.PartitionKeyComparator.Range\032(\n\004Mark" + - "\022\r\n\005value\030\001 \002(\t\022\021\n\tinclusive\030\002 \002(\010\032\272\001\n\005R" + - "ange\022\013\n\003key\030\001 \002(\t\022R\n\005start\030\002 \001(\0132C.org.a" + - "pache.hadoop.hive.metastore.hbase.Partit", - "ionKeyComparator.Mark\022P\n\003end\030\003 \001(\0132C.org" + - ".apache.hadoop.hive.metastore.hbase.Part" + - "itionKeyComparator.Mark\032\241\001\n\010Operator\022Z\n\004" + - "type\030\001 \002(\0162L.org.apache.hadoop.hive.meta" + - "store.hbase.PartitionKeyComparator.Opera" + - "tor.Type\022\013\n\003key\030\002 \002(\t\022\013\n\003val\030\003 \002(\t\"\037\n\004Ty" + - "pe\022\010\n\004LIKE\020\000\022\r\n\tNOTEQUALS\020\001*#\n\rPrincipal" + - "Type\022\010\n\004USER\020\000\022\010\n\004ROLE\020\001" + "\032\254\001\n\013ResourceUri\022`\n\rresource_type\030\001 \002(\0162" + + "I.org.apache.hadoop.hive.metastore.hbase" + + ".Function.ResourceUri.ResourceType\022\013\n\003ur" + + "i\030\002 \002(\t\".\n\014ResourceType\022\007\n\003JAR\020\001\022\010\n\004FILE" + + "\020\002\022\013\n\007ARCHIVE\020\003\"\030\n\014FunctionType\022\010\n\004JAVA\020" + + "\001\"\037\n\tMasterKey\022\022\n\nmaster_key\030\001 \002(\t\",\n\016Pa" + + "rameterEntry\022\013\n\003key\030\001 \002(\t\022\r\n\005value\030\002 \002(\t", + "\"W\n\nParameters\022I\n\tparameter\030\001 \003(\01326.org." + + "apache.hadoop.hive.metastore.hbase.Param" + + "eterEntry\"\360\001\n\tPartition\022\023\n\013create_time\030\001" + + " \001(\003\022\030\n\020last_access_time\030\002 \001(\003\022\020\n\010locati" + + "on\030\003 \001(\t\022I\n\rsd_parameters\030\004 \001(\01322.org.ap" + + "ache.hadoop.hive.metastore.hbase.Paramet" + + "ers\022\017\n\007sd_hash\030\005 \002(\014\022F\n\nparameters\030\006 \001(\013" + + "22.org.apache.hadoop.hive.metastore.hbas" + + "e.Parameters\"\204\001\n\032PrincipalPrivilegeSetEn" + + "try\022\026\n\016principal_name\030\001 \002(\t\022N\n\nprivilege", + "s\030\002 \003(\0132:.org.apache.hadoop.hive.metasto" + + "re.hbase.PrivilegeGrantInfo\"\275\001\n\025Principa" + + "lPrivilegeSet\022Q\n\005users\030\001 \003(\0132B.org.apach" + + "e.hadoop.hive.metastore.hbase.PrincipalP" + + "rivilegeSetEntry\022Q\n\005roles\030\002 \003(\0132B.org.ap" + + "ache.hadoop.hive.metastore.hbase.Princip" + + "alPrivilegeSetEntry\"\260\001\n\022PrivilegeGrantIn" + + "fo\022\021\n\tprivilege\030\001 \001(\t\022\023\n\013create_time\030\002 \001" + + "(\003\022\017\n\007grantor\030\003 \001(\t\022K\n\014grantor_type\030\004 \001(" + + "\01625.org.apache.hadoop.hive.metastore.hba", + "se.PrincipalType\022\024\n\014grant_option\030\005 \001(\010\"\374" + + "\001\n\rRoleGrantInfo\022\026\n\016principal_name\030\001 \002(\t" + + "\022M\n\016principal_type\030\002 \002(\01625.org.apache.ha" + + "doop.hive.metastore.hbase.PrincipalType\022" + + "\020\n\010add_time\030\003 \001(\003\022\017\n\007grantor\030\004 \001(\t\022K\n\014gr" + + "antor_type\030\005 \001(\01625.org.apache.hadoop.hiv" + + "e.metastore.hbase.PrincipalType\022\024\n\014grant" + + "_option\030\006 \001(\010\"^\n\021RoleGrantInfoList\022I\n\ngr" + + "ant_info\030\001 \003(\01325.org.apache.hadoop.hive." + + "metastore.hbase.RoleGrantInfo\"\030\n\010RoleLis", + "t\022\014\n\004role\030\001 \003(\t\"/\n\004Role\022\023\n\013create_time\030\001" + + " \001(\003\022\022\n\nowner_name\030\002 \001(\t\"\254\010\n\021StorageDesc" + + "riptor\022A\n\004cols\030\001 \003(\01323.org.apache.hadoop" + + ".hive.metastore.hbase.FieldSchema\022\024\n\014inp" + + "ut_format\030\002 \001(\t\022\025\n\routput_format\030\003 \001(\t\022\025" + + "\n\ris_compressed\030\004 \001(\010\022\023\n\013num_buckets\030\005 \001" + + "(\021\022W\n\nserde_info\030\006 \001(\0132C.org.apache.hado" + + "op.hive.metastore.hbase.StorageDescripto" + + "r.SerDeInfo\022\023\n\013bucket_cols\030\007 \003(\t\022R\n\tsort" + + "_cols\030\010 \003(\0132?.org.apache.hadoop.hive.met", + "astore.hbase.StorageDescriptor.Order\022Y\n\013" + + "skewed_info\030\t \001(\0132D.org.apache.hadoop.hi" + + "ve.metastore.hbase.StorageDescriptor.Ske" + + "wedInfo\022!\n\031stored_as_sub_directories\030\n \001" + + "(\010\032.\n\005Order\022\023\n\013column_name\030\001 \002(\t\022\020\n\005orde" + + "r\030\002 \001(\021:\0011\032|\n\tSerDeInfo\022\014\n\004name\030\001 \001(\t\022\031\n" + + "\021serialization_lib\030\002 \001(\t\022F\n\nparameters\030\003" + + " \001(\01322.org.apache.hadoop.hive.metastore." + + "hbase.Parameters\032\214\003\n\nSkewedInfo\022\030\n\020skewe" + + "d_col_names\030\001 \003(\t\022r\n\021skewed_col_values\030\002", + " \003(\0132W.org.apache.hadoop.hive.metastore." + + "hbase.StorageDescriptor.SkewedInfo.Skewe" + + "dColValueList\022\206\001\n\036skewed_col_value_locat" + + "ion_maps\030\003 \003(\0132^.org.apache.hadoop.hive." + + "metastore.hbase.StorageDescriptor.Skewed" + + "Info.SkewedColValueLocationMap\032.\n\022Skewed" + + "ColValueList\022\030\n\020skewed_col_value\030\001 \003(\t\0327" + + "\n\031SkewedColValueLocationMap\022\013\n\003key\030\001 \003(\t" + + "\022\r\n\005value\030\002 \002(\t\"\220\004\n\005Table\022\r\n\005owner\030\001 \001(\t" + + "\022\023\n\013create_time\030\002 \001(\003\022\030\n\020last_access_tim", + "e\030\003 \001(\003\022\021\n\tretention\030\004 \001(\003\022\020\n\010location\030\005" + + " \001(\t\022I\n\rsd_parameters\030\006 \001(\01322.org.apache" + + ".hadoop.hive.metastore.hbase.Parameters\022" + + "\017\n\007sd_hash\030\007 \002(\014\022K\n\016partition_keys\030\010 \003(\013" + + "23.org.apache.hadoop.hive.metastore.hbas" + + "e.FieldSchema\022F\n\nparameters\030\t \001(\01322.org." + + "apache.hadoop.hive.metastore.hbase.Param" + + "eters\022\032\n\022view_original_text\030\n \001(\t\022\032\n\022vie" + + "w_expanded_text\030\013 \001(\t\022\022\n\ntable_type\030\014 \001(" + + "\t\022Q\n\nprivileges\030\r \001(\0132=.org.apache.hadoo", + "p.hive.metastore.hbase.PrincipalPrivileg" + + "eSet\022\024\n\014is_temporary\030\016 \001(\010\"\353\004\n\026Partition" + + "KeyComparator\022\r\n\005names\030\001 \002(\t\022\r\n\005types\030\002 " + + "\002(\t\022S\n\002op\030\003 \003(\0132G.org.apache.hadoop.hive" + + ".metastore.hbase.PartitionKeyComparator." + + "Operator\022S\n\005range\030\004 \003(\0132D.org.apache.had" + + "oop.hive.metastore.hbase.PartitionKeyCom" + + "parator.Range\032(\n\004Mark\022\r\n\005value\030\001 \002(\t\022\021\n\t" + + "inclusive\030\002 \002(\010\032\272\001\n\005Range\022\013\n\003key\030\001 \002(\t\022R" + + "\n\005start\030\002 \001(\0132C.org.apache.hadoop.hive.m", + "etastore.hbase.PartitionKeyComparator.Ma" + + "rk\022P\n\003end\030\003 \001(\0132C.org.apache.hadoop.hive" + + ".metastore.hbase.PartitionKeyComparator." + + "Mark\032\241\001\n\010Operator\022Z\n\004type\030\001 \002(\0162L.org.ap" + + "ache.hadoop.hive.metastore.hbase.Partiti" + + "onKeyComparator.Operator.Type\022\013\n\003key\030\002 \002" + + "(\t\022\013\n\003val\030\003 \002(\t\"\037\n\004Type\022\010\n\004LIKE\020\000\022\r\n\tNOT" + + "EQUALS\020\001*#\n\rPrincipalType\022\010\n\004USER\020\000\022\010\n\004R" + + "OLE\020\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -34689,7 +34846,7 @@ public Builder removeRange(int index) { internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_descriptor, - new java.lang.String[] { "LastAnalyzed", "ColumnType", "NumNulls", "NumDistinctValues", "BoolStats", "LongStats", "DoubleStats", "StringStats", "BinaryStats", "DecimalStats", "ColumnName", }); + new java.lang.String[] { "LastAnalyzed", "ColumnType", "NumNulls", "NumDistinctValues", "BoolStats", "LongStats", "DoubleStats", "StringStats", "BinaryStats", "DecimalStats", "ColumnName", "BitVectors", }); internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_BooleanStats_descriptor = internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_descriptor.getNestedTypes().get(0); internal_static_org_apache_hadoop_hive_metastore_hbase_ColumnStats_BooleanStats_fieldAccessorTable = new diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index 0203b06..81577b6 100644 --- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -5425,6 +5425,11 @@ void BooleanColumnStatsData::__set_numNulls(const int64_t val) { this->numNulls = val; } +void BooleanColumnStatsData::__set_bitVectors(const std::string& val) { + this->bitVectors = val; +__isset.bitVectors = true; +} + uint32_t BooleanColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -5473,6 +5478,14 @@ uint32_t BooleanColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr xfer += iprot->skip(ftype); } break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->bitVectors); + this->__isset.bitVectors = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -5508,6 +5521,11 @@ uint32_t BooleanColumnStatsData::write(::apache::thrift::protocol::TProtocol* op xfer += oprot->writeI64(this->numNulls); xfer += oprot->writeFieldEnd(); + if (this->__isset.bitVectors) { + xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -5518,17 +5536,23 @@ void swap(BooleanColumnStatsData &a, BooleanColumnStatsData &b) { swap(a.numTrues, b.numTrues); swap(a.numFalses, b.numFalses); swap(a.numNulls, b.numNulls); + swap(a.bitVectors, b.bitVectors); + swap(a.__isset, b.__isset); } BooleanColumnStatsData::BooleanColumnStatsData(const BooleanColumnStatsData& other279) { numTrues = other279.numTrues; numFalses = other279.numFalses; numNulls = other279.numNulls; + bitVectors = other279.bitVectors; + __isset = other279.__isset; } BooleanColumnStatsData& BooleanColumnStatsData::operator=(const BooleanColumnStatsData& other280) { numTrues = other280.numTrues; numFalses = other280.numFalses; numNulls = other280.numNulls; + bitVectors = other280.bitVectors; + __isset = other280.__isset; return *this; } void BooleanColumnStatsData::printTo(std::ostream& out) const { @@ -5537,6 +5561,7 @@ void BooleanColumnStatsData::printTo(std::ostream& out) const { out << "numTrues=" << to_string(numTrues); out << ", " << "numFalses=" << to_string(numFalses); out << ", " << "numNulls=" << to_string(numNulls); + out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "")); out << ")"; } @@ -5563,6 +5588,11 @@ void DoubleColumnStatsData::__set_numDVs(const int64_t val) { this->numDVs = val; } +void DoubleColumnStatsData::__set_bitVectors(const std::string& val) { + this->bitVectors = val; +__isset.bitVectors = true; +} + uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -5618,6 +5648,14 @@ uint32_t DoubleColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro xfer += iprot->skip(ftype); } break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->bitVectors); + this->__isset.bitVectors = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -5657,6 +5695,11 @@ uint32_t DoubleColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr xfer += oprot->writeI64(this->numDVs); xfer += oprot->writeFieldEnd(); + if (this->__isset.bitVectors) { + xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -5668,6 +5711,7 @@ void swap(DoubleColumnStatsData &a, DoubleColumnStatsData &b) { swap(a.highValue, b.highValue); swap(a.numNulls, b.numNulls); swap(a.numDVs, b.numDVs); + swap(a.bitVectors, b.bitVectors); swap(a.__isset, b.__isset); } @@ -5676,6 +5720,7 @@ DoubleColumnStatsData::DoubleColumnStatsData(const DoubleColumnStatsData& other2 highValue = other281.highValue; numNulls = other281.numNulls; numDVs = other281.numDVs; + bitVectors = other281.bitVectors; __isset = other281.__isset; } DoubleColumnStatsData& DoubleColumnStatsData::operator=(const DoubleColumnStatsData& other282) { @@ -5683,6 +5728,7 @@ DoubleColumnStatsData& DoubleColumnStatsData::operator=(const DoubleColumnStatsD highValue = other282.highValue; numNulls = other282.numNulls; numDVs = other282.numDVs; + bitVectors = other282.bitVectors; __isset = other282.__isset; return *this; } @@ -5693,6 +5739,7 @@ void DoubleColumnStatsData::printTo(std::ostream& out) const { out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "")); out << ", " << "numNulls=" << to_string(numNulls); out << ", " << "numDVs=" << to_string(numDVs); + out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "")); out << ")"; } @@ -5719,6 +5766,11 @@ void LongColumnStatsData::__set_numDVs(const int64_t val) { this->numDVs = val; } +void LongColumnStatsData::__set_bitVectors(const std::string& val) { + this->bitVectors = val; +__isset.bitVectors = true; +} + uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -5774,6 +5826,14 @@ uint32_t LongColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) xfer += iprot->skip(ftype); } break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->bitVectors); + this->__isset.bitVectors = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -5813,6 +5873,11 @@ uint32_t LongColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot xfer += oprot->writeI64(this->numDVs); xfer += oprot->writeFieldEnd(); + if (this->__isset.bitVectors) { + xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -5824,6 +5889,7 @@ void swap(LongColumnStatsData &a, LongColumnStatsData &b) { swap(a.highValue, b.highValue); swap(a.numNulls, b.numNulls); swap(a.numDVs, b.numDVs); + swap(a.bitVectors, b.bitVectors); swap(a.__isset, b.__isset); } @@ -5832,6 +5898,7 @@ LongColumnStatsData::LongColumnStatsData(const LongColumnStatsData& other283) { highValue = other283.highValue; numNulls = other283.numNulls; numDVs = other283.numDVs; + bitVectors = other283.bitVectors; __isset = other283.__isset; } LongColumnStatsData& LongColumnStatsData::operator=(const LongColumnStatsData& other284) { @@ -5839,6 +5906,7 @@ LongColumnStatsData& LongColumnStatsData::operator=(const LongColumnStatsData& o highValue = other284.highValue; numNulls = other284.numNulls; numDVs = other284.numDVs; + bitVectors = other284.bitVectors; __isset = other284.__isset; return *this; } @@ -5849,6 +5917,7 @@ void LongColumnStatsData::printTo(std::ostream& out) const { out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "")); out << ", " << "numNulls=" << to_string(numNulls); out << ", " << "numDVs=" << to_string(numDVs); + out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "")); out << ")"; } @@ -5873,6 +5942,11 @@ void StringColumnStatsData::__set_numDVs(const int64_t val) { this->numDVs = val; } +void StringColumnStatsData::__set_bitVectors(const std::string& val) { + this->bitVectors = val; +__isset.bitVectors = true; +} + uint32_t StringColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -5930,6 +6004,14 @@ uint32_t StringColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro xfer += iprot->skip(ftype); } break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->bitVectors); + this->__isset.bitVectors = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -5971,6 +6053,11 @@ uint32_t StringColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr xfer += oprot->writeI64(this->numDVs); xfer += oprot->writeFieldEnd(); + if (this->__isset.bitVectors) { + xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -5982,6 +6069,8 @@ void swap(StringColumnStatsData &a, StringColumnStatsData &b) { swap(a.avgColLen, b.avgColLen); swap(a.numNulls, b.numNulls); swap(a.numDVs, b.numDVs); + swap(a.bitVectors, b.bitVectors); + swap(a.__isset, b.__isset); } StringColumnStatsData::StringColumnStatsData(const StringColumnStatsData& other285) { @@ -5989,12 +6078,16 @@ StringColumnStatsData::StringColumnStatsData(const StringColumnStatsData& other2 avgColLen = other285.avgColLen; numNulls = other285.numNulls; numDVs = other285.numDVs; + bitVectors = other285.bitVectors; + __isset = other285.__isset; } StringColumnStatsData& StringColumnStatsData::operator=(const StringColumnStatsData& other286) { maxColLen = other286.maxColLen; avgColLen = other286.avgColLen; numNulls = other286.numNulls; numDVs = other286.numDVs; + bitVectors = other286.bitVectors; + __isset = other286.__isset; return *this; } void StringColumnStatsData::printTo(std::ostream& out) const { @@ -6004,6 +6097,7 @@ void StringColumnStatsData::printTo(std::ostream& out) const { out << ", " << "avgColLen=" << to_string(avgColLen); out << ", " << "numNulls=" << to_string(numNulls); out << ", " << "numDVs=" << to_string(numDVs); + out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "")); out << ")"; } @@ -6024,6 +6118,11 @@ void BinaryColumnStatsData::__set_numNulls(const int64_t val) { this->numNulls = val; } +void BinaryColumnStatsData::__set_bitVectors(const std::string& val) { + this->bitVectors = val; +__isset.bitVectors = true; +} + uint32_t BinaryColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -6072,6 +6171,14 @@ uint32_t BinaryColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipro xfer += iprot->skip(ftype); } break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->bitVectors); + this->__isset.bitVectors = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -6107,6 +6214,11 @@ uint32_t BinaryColumnStatsData::write(::apache::thrift::protocol::TProtocol* opr xfer += oprot->writeI64(this->numNulls); xfer += oprot->writeFieldEnd(); + if (this->__isset.bitVectors) { + xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -6117,17 +6229,23 @@ void swap(BinaryColumnStatsData &a, BinaryColumnStatsData &b) { swap(a.maxColLen, b.maxColLen); swap(a.avgColLen, b.avgColLen); swap(a.numNulls, b.numNulls); + swap(a.bitVectors, b.bitVectors); + swap(a.__isset, b.__isset); } BinaryColumnStatsData::BinaryColumnStatsData(const BinaryColumnStatsData& other287) { maxColLen = other287.maxColLen; avgColLen = other287.avgColLen; numNulls = other287.numNulls; + bitVectors = other287.bitVectors; + __isset = other287.__isset; } BinaryColumnStatsData& BinaryColumnStatsData::operator=(const BinaryColumnStatsData& other288) { maxColLen = other288.maxColLen; avgColLen = other288.avgColLen; numNulls = other288.numNulls; + bitVectors = other288.bitVectors; + __isset = other288.__isset; return *this; } void BinaryColumnStatsData::printTo(std::ostream& out) const { @@ -6136,6 +6254,7 @@ void BinaryColumnStatsData::printTo(std::ostream& out) const { out << "maxColLen=" << to_string(maxColLen); out << ", " << "avgColLen=" << to_string(avgColLen); out << ", " << "numNulls=" << to_string(numNulls); + out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "")); out << ")"; } @@ -6271,6 +6390,11 @@ void DecimalColumnStatsData::__set_numDVs(const int64_t val) { this->numDVs = val; } +void DecimalColumnStatsData::__set_bitVectors(const std::string& val) { + this->bitVectors = val; +__isset.bitVectors = true; +} + uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -6326,6 +6450,14 @@ uint32_t DecimalColumnStatsData::read(::apache::thrift::protocol::TProtocol* ipr xfer += iprot->skip(ftype); } break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->bitVectors); + this->__isset.bitVectors = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -6365,6 +6497,11 @@ uint32_t DecimalColumnStatsData::write(::apache::thrift::protocol::TProtocol* op xfer += oprot->writeI64(this->numDVs); xfer += oprot->writeFieldEnd(); + if (this->__isset.bitVectors) { + xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -6376,6 +6513,7 @@ void swap(DecimalColumnStatsData &a, DecimalColumnStatsData &b) { swap(a.highValue, b.highValue); swap(a.numNulls, b.numNulls); swap(a.numDVs, b.numDVs); + swap(a.bitVectors, b.bitVectors); swap(a.__isset, b.__isset); } @@ -6384,6 +6522,7 @@ DecimalColumnStatsData::DecimalColumnStatsData(const DecimalColumnStatsData& oth highValue = other291.highValue; numNulls = other291.numNulls; numDVs = other291.numDVs; + bitVectors = other291.bitVectors; __isset = other291.__isset; } DecimalColumnStatsData& DecimalColumnStatsData::operator=(const DecimalColumnStatsData& other292) { @@ -6391,6 +6530,7 @@ DecimalColumnStatsData& DecimalColumnStatsData::operator=(const DecimalColumnSta highValue = other292.highValue; numNulls = other292.numNulls; numDVs = other292.numDVs; + bitVectors = other292.bitVectors; __isset = other292.__isset; return *this; } @@ -6401,6 +6541,7 @@ void DecimalColumnStatsData::printTo(std::ostream& out) const { out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "")); out << ", " << "numNulls=" << to_string(numNulls); out << ", " << "numDVs=" << to_string(numDVs); + out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "")); out << ")"; } @@ -6513,6 +6654,11 @@ void DateColumnStatsData::__set_numDVs(const int64_t val) { this->numDVs = val; } +void DateColumnStatsData::__set_bitVectors(const std::string& val) { + this->bitVectors = val; +__isset.bitVectors = true; +} + uint32_t DateColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -6568,6 +6714,14 @@ uint32_t DateColumnStatsData::read(::apache::thrift::protocol::TProtocol* iprot) xfer += iprot->skip(ftype); } break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->bitVectors); + this->__isset.bitVectors = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -6607,6 +6761,11 @@ uint32_t DateColumnStatsData::write(::apache::thrift::protocol::TProtocol* oprot xfer += oprot->writeI64(this->numDVs); xfer += oprot->writeFieldEnd(); + if (this->__isset.bitVectors) { + xfer += oprot->writeFieldBegin("bitVectors", ::apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeString(this->bitVectors); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -6618,6 +6777,7 @@ void swap(DateColumnStatsData &a, DateColumnStatsData &b) { swap(a.highValue, b.highValue); swap(a.numNulls, b.numNulls); swap(a.numDVs, b.numDVs); + swap(a.bitVectors, b.bitVectors); swap(a.__isset, b.__isset); } @@ -6626,6 +6786,7 @@ DateColumnStatsData::DateColumnStatsData(const DateColumnStatsData& other295) { highValue = other295.highValue; numNulls = other295.numNulls; numDVs = other295.numDVs; + bitVectors = other295.bitVectors; __isset = other295.__isset; } DateColumnStatsData& DateColumnStatsData::operator=(const DateColumnStatsData& other296) { @@ -6633,6 +6794,7 @@ DateColumnStatsData& DateColumnStatsData::operator=(const DateColumnStatsData& o highValue = other296.highValue; numNulls = other296.numNulls; numDVs = other296.numDVs; + bitVectors = other296.bitVectors; __isset = other296.__isset; return *this; } @@ -6643,6 +6805,7 @@ void DateColumnStatsData::printTo(std::ostream& out) const { out << ", " << "highValue="; (__isset.highValue ? (out << to_string(highValue)) : (out << "")); out << ", " << "numNulls=" << to_string(numNulls); out << ", " << "numDVs=" << to_string(numDVs); + out << ", " << "bitVectors="; (__isset.bitVectors ? (out << to_string(bitVectors)) : (out << "")); out << ")"; } diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index ce1d7da..c501ac0 100644 --- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -2364,19 +2364,26 @@ inline std::ostream& operator<<(std::ostream& out, const Index& obj) return out; } +typedef struct _BooleanColumnStatsData__isset { + _BooleanColumnStatsData__isset() : bitVectors(false) {} + bool bitVectors :1; +} _BooleanColumnStatsData__isset; class BooleanColumnStatsData { public: BooleanColumnStatsData(const BooleanColumnStatsData&); BooleanColumnStatsData& operator=(const BooleanColumnStatsData&); - BooleanColumnStatsData() : numTrues(0), numFalses(0), numNulls(0) { + BooleanColumnStatsData() : numTrues(0), numFalses(0), numNulls(0), bitVectors() { } virtual ~BooleanColumnStatsData() throw(); int64_t numTrues; int64_t numFalses; int64_t numNulls; + std::string bitVectors; + + _BooleanColumnStatsData__isset __isset; void __set_numTrues(const int64_t val); @@ -2384,6 +2391,8 @@ class BooleanColumnStatsData { void __set_numNulls(const int64_t val); + void __set_bitVectors(const std::string& val); + bool operator == (const BooleanColumnStatsData & rhs) const { if (!(numTrues == rhs.numTrues)) @@ -2392,6 +2401,10 @@ class BooleanColumnStatsData { return false; if (!(numNulls == rhs.numNulls)) return false; + if (__isset.bitVectors != rhs.__isset.bitVectors) + return false; + else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors)) + return false; return true; } bool operator != (const BooleanColumnStatsData &rhs) const { @@ -2415,9 +2428,10 @@ inline std::ostream& operator<<(std::ostream& out, const BooleanColumnStatsData& } typedef struct _DoubleColumnStatsData__isset { - _DoubleColumnStatsData__isset() : lowValue(false), highValue(false) {} + _DoubleColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {} bool lowValue :1; bool highValue :1; + bool bitVectors :1; } _DoubleColumnStatsData__isset; class DoubleColumnStatsData { @@ -2425,7 +2439,7 @@ class DoubleColumnStatsData { DoubleColumnStatsData(const DoubleColumnStatsData&); DoubleColumnStatsData& operator=(const DoubleColumnStatsData&); - DoubleColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0) { + DoubleColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0), bitVectors() { } virtual ~DoubleColumnStatsData() throw(); @@ -2433,6 +2447,7 @@ class DoubleColumnStatsData { double highValue; int64_t numNulls; int64_t numDVs; + std::string bitVectors; _DoubleColumnStatsData__isset __isset; @@ -2444,6 +2459,8 @@ class DoubleColumnStatsData { void __set_numDVs(const int64_t val); + void __set_bitVectors(const std::string& val); + bool operator == (const DoubleColumnStatsData & rhs) const { if (__isset.lowValue != rhs.__isset.lowValue) @@ -2458,6 +2475,10 @@ class DoubleColumnStatsData { return false; if (!(numDVs == rhs.numDVs)) return false; + if (__isset.bitVectors != rhs.__isset.bitVectors) + return false; + else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors)) + return false; return true; } bool operator != (const DoubleColumnStatsData &rhs) const { @@ -2481,9 +2502,10 @@ inline std::ostream& operator<<(std::ostream& out, const DoubleColumnStatsData& } typedef struct _LongColumnStatsData__isset { - _LongColumnStatsData__isset() : lowValue(false), highValue(false) {} + _LongColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {} bool lowValue :1; bool highValue :1; + bool bitVectors :1; } _LongColumnStatsData__isset; class LongColumnStatsData { @@ -2491,7 +2513,7 @@ class LongColumnStatsData { LongColumnStatsData(const LongColumnStatsData&); LongColumnStatsData& operator=(const LongColumnStatsData&); - LongColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0) { + LongColumnStatsData() : lowValue(0), highValue(0), numNulls(0), numDVs(0), bitVectors() { } virtual ~LongColumnStatsData() throw(); @@ -2499,6 +2521,7 @@ class LongColumnStatsData { int64_t highValue; int64_t numNulls; int64_t numDVs; + std::string bitVectors; _LongColumnStatsData__isset __isset; @@ -2510,6 +2533,8 @@ class LongColumnStatsData { void __set_numDVs(const int64_t val); + void __set_bitVectors(const std::string& val); + bool operator == (const LongColumnStatsData & rhs) const { if (__isset.lowValue != rhs.__isset.lowValue) @@ -2524,6 +2549,10 @@ class LongColumnStatsData { return false; if (!(numDVs == rhs.numDVs)) return false; + if (__isset.bitVectors != rhs.__isset.bitVectors) + return false; + else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors)) + return false; return true; } bool operator != (const LongColumnStatsData &rhs) const { @@ -2546,13 +2575,17 @@ inline std::ostream& operator<<(std::ostream& out, const LongColumnStatsData& ob return out; } +typedef struct _StringColumnStatsData__isset { + _StringColumnStatsData__isset() : bitVectors(false) {} + bool bitVectors :1; +} _StringColumnStatsData__isset; class StringColumnStatsData { public: StringColumnStatsData(const StringColumnStatsData&); StringColumnStatsData& operator=(const StringColumnStatsData&); - StringColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), numDVs(0) { + StringColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), numDVs(0), bitVectors() { } virtual ~StringColumnStatsData() throw(); @@ -2560,6 +2593,9 @@ class StringColumnStatsData { double avgColLen; int64_t numNulls; int64_t numDVs; + std::string bitVectors; + + _StringColumnStatsData__isset __isset; void __set_maxColLen(const int64_t val); @@ -2569,6 +2605,8 @@ class StringColumnStatsData { void __set_numDVs(const int64_t val); + void __set_bitVectors(const std::string& val); + bool operator == (const StringColumnStatsData & rhs) const { if (!(maxColLen == rhs.maxColLen)) @@ -2579,6 +2617,10 @@ class StringColumnStatsData { return false; if (!(numDVs == rhs.numDVs)) return false; + if (__isset.bitVectors != rhs.__isset.bitVectors) + return false; + else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors)) + return false; return true; } bool operator != (const StringColumnStatsData &rhs) const { @@ -2601,19 +2643,26 @@ inline std::ostream& operator<<(std::ostream& out, const StringColumnStatsData& return out; } +typedef struct _BinaryColumnStatsData__isset { + _BinaryColumnStatsData__isset() : bitVectors(false) {} + bool bitVectors :1; +} _BinaryColumnStatsData__isset; class BinaryColumnStatsData { public: BinaryColumnStatsData(const BinaryColumnStatsData&); BinaryColumnStatsData& operator=(const BinaryColumnStatsData&); - BinaryColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0) { + BinaryColumnStatsData() : maxColLen(0), avgColLen(0), numNulls(0), bitVectors() { } virtual ~BinaryColumnStatsData() throw(); int64_t maxColLen; double avgColLen; int64_t numNulls; + std::string bitVectors; + + _BinaryColumnStatsData__isset __isset; void __set_maxColLen(const int64_t val); @@ -2621,6 +2670,8 @@ class BinaryColumnStatsData { void __set_numNulls(const int64_t val); + void __set_bitVectors(const std::string& val); + bool operator == (const BinaryColumnStatsData & rhs) const { if (!(maxColLen == rhs.maxColLen)) @@ -2629,6 +2680,10 @@ class BinaryColumnStatsData { return false; if (!(numNulls == rhs.numNulls)) return false; + if (__isset.bitVectors != rhs.__isset.bitVectors) + return false; + else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors)) + return false; return true; } bool operator != (const BinaryColumnStatsData &rhs) const { @@ -2697,9 +2752,10 @@ inline std::ostream& operator<<(std::ostream& out, const Decimal& obj) } typedef struct _DecimalColumnStatsData__isset { - _DecimalColumnStatsData__isset() : lowValue(false), highValue(false) {} + _DecimalColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {} bool lowValue :1; bool highValue :1; + bool bitVectors :1; } _DecimalColumnStatsData__isset; class DecimalColumnStatsData { @@ -2707,7 +2763,7 @@ class DecimalColumnStatsData { DecimalColumnStatsData(const DecimalColumnStatsData&); DecimalColumnStatsData& operator=(const DecimalColumnStatsData&); - DecimalColumnStatsData() : numNulls(0), numDVs(0) { + DecimalColumnStatsData() : numNulls(0), numDVs(0), bitVectors() { } virtual ~DecimalColumnStatsData() throw(); @@ -2715,6 +2771,7 @@ class DecimalColumnStatsData { Decimal highValue; int64_t numNulls; int64_t numDVs; + std::string bitVectors; _DecimalColumnStatsData__isset __isset; @@ -2726,6 +2783,8 @@ class DecimalColumnStatsData { void __set_numDVs(const int64_t val); + void __set_bitVectors(const std::string& val); + bool operator == (const DecimalColumnStatsData & rhs) const { if (__isset.lowValue != rhs.__isset.lowValue) @@ -2740,6 +2799,10 @@ class DecimalColumnStatsData { return false; if (!(numDVs == rhs.numDVs)) return false; + if (__isset.bitVectors != rhs.__isset.bitVectors) + return false; + else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors)) + return false; return true; } bool operator != (const DecimalColumnStatsData &rhs) const { @@ -2803,9 +2866,10 @@ inline std::ostream& operator<<(std::ostream& out, const Date& obj) } typedef struct _DateColumnStatsData__isset { - _DateColumnStatsData__isset() : lowValue(false), highValue(false) {} + _DateColumnStatsData__isset() : lowValue(false), highValue(false), bitVectors(false) {} bool lowValue :1; bool highValue :1; + bool bitVectors :1; } _DateColumnStatsData__isset; class DateColumnStatsData { @@ -2813,7 +2877,7 @@ class DateColumnStatsData { DateColumnStatsData(const DateColumnStatsData&); DateColumnStatsData& operator=(const DateColumnStatsData&); - DateColumnStatsData() : numNulls(0), numDVs(0) { + DateColumnStatsData() : numNulls(0), numDVs(0), bitVectors() { } virtual ~DateColumnStatsData() throw(); @@ -2821,6 +2885,7 @@ class DateColumnStatsData { Date highValue; int64_t numNulls; int64_t numDVs; + std::string bitVectors; _DateColumnStatsData__isset __isset; @@ -2832,6 +2897,8 @@ class DateColumnStatsData { void __set_numDVs(const int64_t val); + void __set_bitVectors(const std::string& val); + bool operator == (const DateColumnStatsData & rhs) const { if (__isset.lowValue != rhs.__isset.lowValue) @@ -2846,6 +2913,10 @@ class DateColumnStatsData { return false; if (!(numDVs == rhs.numDVs)) return false; + if (__isset.bitVectors != rhs.__isset.bitVectors) + return false; + else if (__isset.bitVectors && !(bitVectors == rhs.bitVectors)) + return false; return true; } bool operator != (const DateColumnStatsData &rhs) const { diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java index 84e393c..eeb5105 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BinaryColumnStatsData.java @@ -41,6 +41,7 @@ private static final org.apache.thrift.protocol.TField MAX_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("maxColLen", org.apache.thrift.protocol.TType.I64, (short)1); private static final org.apache.thrift.protocol.TField AVG_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("avgColLen", org.apache.thrift.protocol.TType.DOUBLE, (short)2); private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)4); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -51,12 +52,14 @@ private long maxColLen; // required private double avgColLen; // required private long numNulls; // required + private String bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { MAX_COL_LEN((short)1, "maxColLen"), AVG_COL_LEN((short)2, "avgColLen"), - NUM_NULLS((short)3, "numNulls"); + NUM_NULLS((short)3, "numNulls"), + BIT_VECTORS((short)4, "bitVectors"); private static final Map byName = new HashMap(); @@ -77,6 +80,8 @@ public static _Fields findByThriftId(int fieldId) { return AVG_COL_LEN; case 3: // NUM_NULLS return NUM_NULLS; + case 4: // BIT_VECTORS + return BIT_VECTORS; default: return null; } @@ -121,6 +126,7 @@ public String getFieldName() { private static final int __AVGCOLLEN_ISSET_ID = 1; private static final int __NUMNULLS_ISSET_ID = 2; private byte __isset_bitfield = 0; + private static final _Fields optionals[] = {_Fields.BIT_VECTORS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -130,6 +136,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))); tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(BinaryColumnStatsData.class, metaDataMap); } @@ -159,6 +167,9 @@ public BinaryColumnStatsData(BinaryColumnStatsData other) { this.maxColLen = other.maxColLen; this.avgColLen = other.avgColLen; this.numNulls = other.numNulls; + if (other.isSetBitVectors()) { + this.bitVectors = other.bitVectors; + } } public BinaryColumnStatsData deepCopy() { @@ -173,6 +184,7 @@ public void clear() { this.avgColLen = 0.0; setNumNullsIsSet(false); this.numNulls = 0; + this.bitVectors = null; } public long getMaxColLen() { @@ -241,6 +253,29 @@ public void setNumNullsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value); } + public String getBitVectors() { + return this.bitVectors; + } + + public void setBitVectors(String bitVectors) { + this.bitVectors = bitVectors; + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case MAX_COL_LEN: @@ -267,6 +302,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((String)value); + } + break; + } } @@ -281,6 +324,9 @@ public Object getFieldValue(_Fields field) { case NUM_NULLS: return getNumNulls(); + case BIT_VECTORS: + return getBitVectors(); + } throw new IllegalStateException(); } @@ -298,6 +344,8 @@ public boolean isSet(_Fields field) { return isSetAvgColLen(); case NUM_NULLS: return isSetNumNulls(); + case BIT_VECTORS: + return isSetBitVectors(); } throw new IllegalStateException(); } @@ -342,6 +390,15 @@ public boolean equals(BinaryColumnStatsData that) { return false; } + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + return true; } @@ -364,6 +421,11 @@ public int hashCode() { if (present_numNulls) list.add(numNulls); + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + return list.hashCode(); } @@ -405,6 +467,16 @@ public int compareTo(BinaryColumnStatsData other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -436,6 +508,16 @@ public String toString() { sb.append("numNulls:"); sb.append(this.numNulls); first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + sb.append(this.bitVectors); + } + first = false; + } sb.append(")"); return sb.toString(); } @@ -517,6 +599,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, BinaryColumnStatsDa org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 4: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -539,6 +629,13 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, BinaryColumnStatsD oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); oprot.writeI64(struct.numNulls); oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeString(struct.bitVectors); + oprot.writeFieldEnd(); + } + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -559,6 +656,14 @@ public void write(org.apache.thrift.protocol.TProtocol prot, BinaryColumnStatsDa oprot.writeI64(struct.maxColLen); oprot.writeDouble(struct.avgColLen); oprot.writeI64(struct.numNulls); + BitSet optionals = new BitSet(); + if (struct.isSetBitVectors()) { + optionals.set(0); + } + oprot.writeBitSet(optionals, 1); + if (struct.isSetBitVectors()) { + oprot.writeString(struct.bitVectors); + } } @Override @@ -570,6 +675,11 @@ public void read(org.apache.thrift.protocol.TProtocol prot, BinaryColumnStatsDat struct.setAvgColLenIsSet(true); struct.numNulls = iprot.readI64(); struct.setNumNullsIsSet(true); + BitSet incoming = iprot.readBitSet(1); + if (incoming.get(0)) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } } } diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java index 6aa4668..de39d21 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/BooleanColumnStatsData.java @@ -41,6 +41,7 @@ private static final org.apache.thrift.protocol.TField NUM_TRUES_FIELD_DESC = new org.apache.thrift.protocol.TField("numTrues", org.apache.thrift.protocol.TType.I64, (short)1); private static final org.apache.thrift.protocol.TField NUM_FALSES_FIELD_DESC = new org.apache.thrift.protocol.TField("numFalses", org.apache.thrift.protocol.TType.I64, (short)2); private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)4); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -51,12 +52,14 @@ private long numTrues; // required private long numFalses; // required private long numNulls; // required + private String bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { NUM_TRUES((short)1, "numTrues"), NUM_FALSES((short)2, "numFalses"), - NUM_NULLS((short)3, "numNulls"); + NUM_NULLS((short)3, "numNulls"), + BIT_VECTORS((short)4, "bitVectors"); private static final Map byName = new HashMap(); @@ -77,6 +80,8 @@ public static _Fields findByThriftId(int fieldId) { return NUM_FALSES; case 3: // NUM_NULLS return NUM_NULLS; + case 4: // BIT_VECTORS + return BIT_VECTORS; default: return null; } @@ -121,6 +126,7 @@ public String getFieldName() { private static final int __NUMFALSES_ISSET_ID = 1; private static final int __NUMNULLS_ISSET_ID = 2; private byte __isset_bitfield = 0; + private static final _Fields optionals[] = {_Fields.BIT_VECTORS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -130,6 +136,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(BooleanColumnStatsData.class, metaDataMap); } @@ -159,6 +167,9 @@ public BooleanColumnStatsData(BooleanColumnStatsData other) { this.numTrues = other.numTrues; this.numFalses = other.numFalses; this.numNulls = other.numNulls; + if (other.isSetBitVectors()) { + this.bitVectors = other.bitVectors; + } } public BooleanColumnStatsData deepCopy() { @@ -173,6 +184,7 @@ public void clear() { this.numFalses = 0; setNumNullsIsSet(false); this.numNulls = 0; + this.bitVectors = null; } public long getNumTrues() { @@ -241,6 +253,29 @@ public void setNumNullsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value); } + public String getBitVectors() { + return this.bitVectors; + } + + public void setBitVectors(String bitVectors) { + this.bitVectors = bitVectors; + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case NUM_TRUES: @@ -267,6 +302,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((String)value); + } + break; + } } @@ -281,6 +324,9 @@ public Object getFieldValue(_Fields field) { case NUM_NULLS: return getNumNulls(); + case BIT_VECTORS: + return getBitVectors(); + } throw new IllegalStateException(); } @@ -298,6 +344,8 @@ public boolean isSet(_Fields field) { return isSetNumFalses(); case NUM_NULLS: return isSetNumNulls(); + case BIT_VECTORS: + return isSetBitVectors(); } throw new IllegalStateException(); } @@ -342,6 +390,15 @@ public boolean equals(BooleanColumnStatsData that) { return false; } + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + return true; } @@ -364,6 +421,11 @@ public int hashCode() { if (present_numNulls) list.add(numNulls); + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + return list.hashCode(); } @@ -405,6 +467,16 @@ public int compareTo(BooleanColumnStatsData other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -436,6 +508,16 @@ public String toString() { sb.append("numNulls:"); sb.append(this.numNulls); first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + sb.append(this.bitVectors); + } + first = false; + } sb.append(")"); return sb.toString(); } @@ -517,6 +599,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, BooleanColumnStatsD org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 4: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -539,6 +629,13 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, BooleanColumnStats oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); oprot.writeI64(struct.numNulls); oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeString(struct.bitVectors); + oprot.writeFieldEnd(); + } + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -559,6 +656,14 @@ public void write(org.apache.thrift.protocol.TProtocol prot, BooleanColumnStatsD oprot.writeI64(struct.numTrues); oprot.writeI64(struct.numFalses); oprot.writeI64(struct.numNulls); + BitSet optionals = new BitSet(); + if (struct.isSetBitVectors()) { + optionals.set(0); + } + oprot.writeBitSet(optionals, 1); + if (struct.isSetBitVectors()) { + oprot.writeString(struct.bitVectors); + } } @Override @@ -570,6 +675,11 @@ public void read(org.apache.thrift.protocol.TProtocol prot, BooleanColumnStatsDa struct.setNumFalsesIsSet(true); struct.numNulls = iprot.readI64(); struct.setNumNullsIsSet(true); + BitSet incoming = iprot.readBitSet(1); + if (incoming.get(0)) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } } } diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java index 2ebb811..edc87a1 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DateColumnStatsData.java @@ -42,6 +42,7 @@ private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.STRUCT, (short)2); private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -53,13 +54,15 @@ private Date highValue; // optional private long numNulls; // required private long numDVs; // required + private String bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { LOW_VALUE((short)1, "lowValue"), HIGH_VALUE((short)2, "highValue"), NUM_NULLS((short)3, "numNulls"), - NUM_DVS((short)4, "numDVs"); + NUM_DVS((short)4, "numDVs"), + BIT_VECTORS((short)5, "bitVectors"); private static final Map byName = new HashMap(); @@ -82,6 +85,8 @@ public static _Fields findByThriftId(int fieldId) { return NUM_NULLS; case 4: // NUM_DVS return NUM_DVS; + case 5: // BIT_VECTORS + return BIT_VECTORS; default: return null; } @@ -125,7 +130,7 @@ public String getFieldName() { private static final int __NUMNULLS_ISSET_ID = 0; private static final int __NUMDVS_ISSET_ID = 1; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE}; + private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -137,6 +142,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DateColumnStatsData.class, metaDataMap); } @@ -168,6 +175,9 @@ public DateColumnStatsData(DateColumnStatsData other) { } this.numNulls = other.numNulls; this.numDVs = other.numDVs; + if (other.isSetBitVectors()) { + this.bitVectors = other.bitVectors; + } } public DateColumnStatsData deepCopy() { @@ -182,6 +192,7 @@ public void clear() { this.numNulls = 0; setNumDVsIsSet(false); this.numDVs = 0; + this.bitVectors = null; } public Date getLowValue() { @@ -274,6 +285,29 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } + public String getBitVectors() { + return this.bitVectors; + } + + public void setBitVectors(String bitVectors) { + this.bitVectors = bitVectors; + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case LOW_VALUE: @@ -308,6 +342,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((String)value); + } + break; + } } @@ -325,6 +367,9 @@ public Object getFieldValue(_Fields field) { case NUM_DVS: return getNumDVs(); + case BIT_VECTORS: + return getBitVectors(); + } throw new IllegalStateException(); } @@ -344,6 +389,8 @@ public boolean isSet(_Fields field) { return isSetNumNulls(); case NUM_DVS: return isSetNumDVs(); + case BIT_VECTORS: + return isSetBitVectors(); } throw new IllegalStateException(); } @@ -397,6 +444,15 @@ public boolean equals(DateColumnStatsData that) { return false; } + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + return true; } @@ -424,6 +480,11 @@ public int hashCode() { if (present_numDVs) list.add(numDVs); + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + return list.hashCode(); } @@ -475,6 +536,16 @@ public int compareTo(DateColumnStatsData other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -522,6 +593,16 @@ public String toString() { sb.append("numDVs:"); sb.append(this.numDVs); first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + sb.append(this.bitVectors); + } + first = false; + } sb.append(")"); return sb.toString(); } @@ -615,6 +696,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, DateColumnStatsData org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 5: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -648,6 +737,13 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, DateColumnStatsDat oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); oprot.writeI64(struct.numDVs); oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeString(struct.bitVectors); + oprot.writeFieldEnd(); + } + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -674,13 +770,19 @@ public void write(org.apache.thrift.protocol.TProtocol prot, DateColumnStatsData if (struct.isSetHighValue()) { optionals.set(1); } - oprot.writeBitSet(optionals, 2); + if (struct.isSetBitVectors()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); if (struct.isSetLowValue()) { struct.lowValue.write(oprot); } if (struct.isSetHighValue()) { struct.highValue.write(oprot); } + if (struct.isSetBitVectors()) { + oprot.writeString(struct.bitVectors); + } } @Override @@ -690,7 +792,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DateColumnStatsData struct.setNumNullsIsSet(true); struct.numDVs = iprot.readI64(); struct.setNumDVsIsSet(true); - BitSet incoming = iprot.readBitSet(2); + BitSet incoming = iprot.readBitSet(3); if (incoming.get(0)) { struct.lowValue = new Date(); struct.lowValue.read(iprot); @@ -701,6 +803,10 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DateColumnStatsData struct.highValue.read(iprot); struct.setHighValueIsSet(true); } + if (incoming.get(2)) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } } } diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java index 720176a..ec363dc 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DecimalColumnStatsData.java @@ -42,6 +42,7 @@ private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.STRUCT, (short)2); private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -53,13 +54,15 @@ private Decimal highValue; // optional private long numNulls; // required private long numDVs; // required + private String bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { LOW_VALUE((short)1, "lowValue"), HIGH_VALUE((short)2, "highValue"), NUM_NULLS((short)3, "numNulls"), - NUM_DVS((short)4, "numDVs"); + NUM_DVS((short)4, "numDVs"), + BIT_VECTORS((short)5, "bitVectors"); private static final Map byName = new HashMap(); @@ -82,6 +85,8 @@ public static _Fields findByThriftId(int fieldId) { return NUM_NULLS; case 4: // NUM_DVS return NUM_DVS; + case 5: // BIT_VECTORS + return BIT_VECTORS; default: return null; } @@ -125,7 +130,7 @@ public String getFieldName() { private static final int __NUMNULLS_ISSET_ID = 0; private static final int __NUMDVS_ISSET_ID = 1; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE}; + private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -137,6 +142,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DecimalColumnStatsData.class, metaDataMap); } @@ -168,6 +175,9 @@ public DecimalColumnStatsData(DecimalColumnStatsData other) { } this.numNulls = other.numNulls; this.numDVs = other.numDVs; + if (other.isSetBitVectors()) { + this.bitVectors = other.bitVectors; + } } public DecimalColumnStatsData deepCopy() { @@ -182,6 +192,7 @@ public void clear() { this.numNulls = 0; setNumDVsIsSet(false); this.numDVs = 0; + this.bitVectors = null; } public Decimal getLowValue() { @@ -274,6 +285,29 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } + public String getBitVectors() { + return this.bitVectors; + } + + public void setBitVectors(String bitVectors) { + this.bitVectors = bitVectors; + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case LOW_VALUE: @@ -308,6 +342,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((String)value); + } + break; + } } @@ -325,6 +367,9 @@ public Object getFieldValue(_Fields field) { case NUM_DVS: return getNumDVs(); + case BIT_VECTORS: + return getBitVectors(); + } throw new IllegalStateException(); } @@ -344,6 +389,8 @@ public boolean isSet(_Fields field) { return isSetNumNulls(); case NUM_DVS: return isSetNumDVs(); + case BIT_VECTORS: + return isSetBitVectors(); } throw new IllegalStateException(); } @@ -397,6 +444,15 @@ public boolean equals(DecimalColumnStatsData that) { return false; } + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + return true; } @@ -424,6 +480,11 @@ public int hashCode() { if (present_numDVs) list.add(numDVs); + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + return list.hashCode(); } @@ -475,6 +536,16 @@ public int compareTo(DecimalColumnStatsData other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -522,6 +593,16 @@ public String toString() { sb.append("numDVs:"); sb.append(this.numDVs); first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + sb.append(this.bitVectors); + } + first = false; + } sb.append(")"); return sb.toString(); } @@ -615,6 +696,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, DecimalColumnStatsD org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 5: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -648,6 +737,13 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, DecimalColumnStats oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); oprot.writeI64(struct.numDVs); oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeString(struct.bitVectors); + oprot.writeFieldEnd(); + } + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -674,13 +770,19 @@ public void write(org.apache.thrift.protocol.TProtocol prot, DecimalColumnStatsD if (struct.isSetHighValue()) { optionals.set(1); } - oprot.writeBitSet(optionals, 2); + if (struct.isSetBitVectors()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); if (struct.isSetLowValue()) { struct.lowValue.write(oprot); } if (struct.isSetHighValue()) { struct.highValue.write(oprot); } + if (struct.isSetBitVectors()) { + oprot.writeString(struct.bitVectors); + } } @Override @@ -690,7 +792,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DecimalColumnStatsDa struct.setNumNullsIsSet(true); struct.numDVs = iprot.readI64(); struct.setNumDVsIsSet(true); - BitSet incoming = iprot.readBitSet(2); + BitSet incoming = iprot.readBitSet(3); if (incoming.get(0)) { struct.lowValue = new Decimal(); struct.lowValue.read(iprot); @@ -701,6 +803,10 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DecimalColumnStatsDa struct.highValue.read(iprot); struct.setHighValueIsSet(true); } + if (incoming.get(2)) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } } } diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java index 5d48b5d..e3340e4 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/DoubleColumnStatsData.java @@ -42,6 +42,7 @@ private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.DOUBLE, (short)2); private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -53,13 +54,15 @@ private double highValue; // optional private long numNulls; // required private long numDVs; // required + private String bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { LOW_VALUE((short)1, "lowValue"), HIGH_VALUE((short)2, "highValue"), NUM_NULLS((short)3, "numNulls"), - NUM_DVS((short)4, "numDVs"); + NUM_DVS((short)4, "numDVs"), + BIT_VECTORS((short)5, "bitVectors"); private static final Map byName = new HashMap(); @@ -82,6 +85,8 @@ public static _Fields findByThriftId(int fieldId) { return NUM_NULLS; case 4: // NUM_DVS return NUM_DVS; + case 5: // BIT_VECTORS + return BIT_VECTORS; default: return null; } @@ -127,7 +132,7 @@ public String getFieldName() { private static final int __NUMNULLS_ISSET_ID = 2; private static final int __NUMDVS_ISSET_ID = 3; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE}; + private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -139,6 +144,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(DoubleColumnStatsData.class, metaDataMap); } @@ -166,6 +173,9 @@ public DoubleColumnStatsData(DoubleColumnStatsData other) { this.highValue = other.highValue; this.numNulls = other.numNulls; this.numDVs = other.numDVs; + if (other.isSetBitVectors()) { + this.bitVectors = other.bitVectors; + } } public DoubleColumnStatsData deepCopy() { @@ -182,6 +192,7 @@ public void clear() { this.numNulls = 0; setNumDVsIsSet(false); this.numDVs = 0; + this.bitVectors = null; } public double getLowValue() { @@ -272,6 +283,29 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } + public String getBitVectors() { + return this.bitVectors; + } + + public void setBitVectors(String bitVectors) { + this.bitVectors = bitVectors; + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case LOW_VALUE: @@ -306,6 +340,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((String)value); + } + break; + } } @@ -323,6 +365,9 @@ public Object getFieldValue(_Fields field) { case NUM_DVS: return getNumDVs(); + case BIT_VECTORS: + return getBitVectors(); + } throw new IllegalStateException(); } @@ -342,6 +387,8 @@ public boolean isSet(_Fields field) { return isSetNumNulls(); case NUM_DVS: return isSetNumDVs(); + case BIT_VECTORS: + return isSetBitVectors(); } throw new IllegalStateException(); } @@ -395,6 +442,15 @@ public boolean equals(DoubleColumnStatsData that) { return false; } + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + return true; } @@ -422,6 +478,11 @@ public int hashCode() { if (present_numDVs) list.add(numDVs); + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + return list.hashCode(); } @@ -473,6 +534,16 @@ public int compareTo(DoubleColumnStatsData other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -512,6 +583,16 @@ public String toString() { sb.append("numDVs:"); sb.append(this.numDVs); first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + sb.append(this.bitVectors); + } + first = false; + } sb.append(")"); return sb.toString(); } @@ -597,6 +678,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, DoubleColumnStatsDa org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 5: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -626,6 +715,13 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, DoubleColumnStatsD oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); oprot.writeI64(struct.numDVs); oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeString(struct.bitVectors); + oprot.writeFieldEnd(); + } + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -652,13 +748,19 @@ public void write(org.apache.thrift.protocol.TProtocol prot, DoubleColumnStatsDa if (struct.isSetHighValue()) { optionals.set(1); } - oprot.writeBitSet(optionals, 2); + if (struct.isSetBitVectors()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); if (struct.isSetLowValue()) { oprot.writeDouble(struct.lowValue); } if (struct.isSetHighValue()) { oprot.writeDouble(struct.highValue); } + if (struct.isSetBitVectors()) { + oprot.writeString(struct.bitVectors); + } } @Override @@ -668,7 +770,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DoubleColumnStatsDat struct.setNumNullsIsSet(true); struct.numDVs = iprot.readI64(); struct.setNumDVsIsSet(true); - BitSet incoming = iprot.readBitSet(2); + BitSet incoming = iprot.readBitSet(3); if (incoming.get(0)) { struct.lowValue = iprot.readDouble(); struct.setLowValueIsSet(true); @@ -677,6 +779,10 @@ public void read(org.apache.thrift.protocol.TProtocol prot, DoubleColumnStatsDat struct.highValue = iprot.readDouble(); struct.setHighValueIsSet(true); } + if (incoming.get(2)) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } } } diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java index 2f41c5a..4404706 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/LongColumnStatsData.java @@ -42,6 +42,7 @@ private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.I64, (short)2); private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -53,13 +54,15 @@ private long highValue; // optional private long numNulls; // required private long numDVs; // required + private String bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { LOW_VALUE((short)1, "lowValue"), HIGH_VALUE((short)2, "highValue"), NUM_NULLS((short)3, "numNulls"), - NUM_DVS((short)4, "numDVs"); + NUM_DVS((short)4, "numDVs"), + BIT_VECTORS((short)5, "bitVectors"); private static final Map byName = new HashMap(); @@ -82,6 +85,8 @@ public static _Fields findByThriftId(int fieldId) { return NUM_NULLS; case 4: // NUM_DVS return NUM_DVS; + case 5: // BIT_VECTORS + return BIT_VECTORS; default: return null; } @@ -127,7 +132,7 @@ public String getFieldName() { private static final int __NUMNULLS_ISSET_ID = 2; private static final int __NUMDVS_ISSET_ID = 3; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE}; + private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -139,6 +144,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(LongColumnStatsData.class, metaDataMap); } @@ -166,6 +173,9 @@ public LongColumnStatsData(LongColumnStatsData other) { this.highValue = other.highValue; this.numNulls = other.numNulls; this.numDVs = other.numDVs; + if (other.isSetBitVectors()) { + this.bitVectors = other.bitVectors; + } } public LongColumnStatsData deepCopy() { @@ -182,6 +192,7 @@ public void clear() { this.numNulls = 0; setNumDVsIsSet(false); this.numDVs = 0; + this.bitVectors = null; } public long getLowValue() { @@ -272,6 +283,29 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } + public String getBitVectors() { + return this.bitVectors; + } + + public void setBitVectors(String bitVectors) { + this.bitVectors = bitVectors; + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case LOW_VALUE: @@ -306,6 +340,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((String)value); + } + break; + } } @@ -323,6 +365,9 @@ public Object getFieldValue(_Fields field) { case NUM_DVS: return getNumDVs(); + case BIT_VECTORS: + return getBitVectors(); + } throw new IllegalStateException(); } @@ -342,6 +387,8 @@ public boolean isSet(_Fields field) { return isSetNumNulls(); case NUM_DVS: return isSetNumDVs(); + case BIT_VECTORS: + return isSetBitVectors(); } throw new IllegalStateException(); } @@ -395,6 +442,15 @@ public boolean equals(LongColumnStatsData that) { return false; } + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + return true; } @@ -422,6 +478,11 @@ public int hashCode() { if (present_numDVs) list.add(numDVs); + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + return list.hashCode(); } @@ -473,6 +534,16 @@ public int compareTo(LongColumnStatsData other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -512,6 +583,16 @@ public String toString() { sb.append("numDVs:"); sb.append(this.numDVs); first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + sb.append(this.bitVectors); + } + first = false; + } sb.append(")"); return sb.toString(); } @@ -597,6 +678,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, LongColumnStatsData org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 5: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -626,6 +715,13 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, LongColumnStatsDat oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); oprot.writeI64(struct.numDVs); oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeString(struct.bitVectors); + oprot.writeFieldEnd(); + } + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -652,13 +748,19 @@ public void write(org.apache.thrift.protocol.TProtocol prot, LongColumnStatsData if (struct.isSetHighValue()) { optionals.set(1); } - oprot.writeBitSet(optionals, 2); + if (struct.isSetBitVectors()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); if (struct.isSetLowValue()) { oprot.writeI64(struct.lowValue); } if (struct.isSetHighValue()) { oprot.writeI64(struct.highValue); } + if (struct.isSetBitVectors()) { + oprot.writeString(struct.bitVectors); + } } @Override @@ -668,7 +770,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, LongColumnStatsData struct.setNumNullsIsSet(true); struct.numDVs = iprot.readI64(); struct.setNumDVsIsSet(true); - BitSet incoming = iprot.readBitSet(2); + BitSet incoming = iprot.readBitSet(3); if (incoming.get(0)) { struct.lowValue = iprot.readI64(); struct.setLowValueIsSet(true); @@ -677,6 +779,10 @@ public void read(org.apache.thrift.protocol.TProtocol prot, LongColumnStatsData struct.highValue = iprot.readI64(); struct.setHighValueIsSet(true); } + if (incoming.get(2)) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } } } diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java index bd8a922..c9afe87 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StringColumnStatsData.java @@ -42,6 +42,7 @@ private static final org.apache.thrift.protocol.TField AVG_COL_LEN_FIELD_DESC = new org.apache.thrift.protocol.TField("avgColLen", org.apache.thrift.protocol.TType.DOUBLE, (short)2); private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -53,13 +54,15 @@ private double avgColLen; // required private long numNulls; // required private long numDVs; // required + private String bitVectors; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { MAX_COL_LEN((short)1, "maxColLen"), AVG_COL_LEN((short)2, "avgColLen"), NUM_NULLS((short)3, "numNulls"), - NUM_DVS((short)4, "numDVs"); + NUM_DVS((short)4, "numDVs"), + BIT_VECTORS((short)5, "bitVectors"); private static final Map byName = new HashMap(); @@ -82,6 +85,8 @@ public static _Fields findByThriftId(int fieldId) { return NUM_NULLS; case 4: // NUM_DVS return NUM_DVS; + case 5: // BIT_VECTORS + return BIT_VECTORS; default: return null; } @@ -127,6 +132,7 @@ public String getFieldName() { private static final int __NUMNULLS_ISSET_ID = 2; private static final int __NUMDVS_ISSET_ID = 3; private byte __isset_bitfield = 0; + private static final _Fields optionals[] = {_Fields.BIT_VECTORS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -138,6 +144,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StringColumnStatsData.class, metaDataMap); } @@ -171,6 +179,9 @@ public StringColumnStatsData(StringColumnStatsData other) { this.avgColLen = other.avgColLen; this.numNulls = other.numNulls; this.numDVs = other.numDVs; + if (other.isSetBitVectors()) { + this.bitVectors = other.bitVectors; + } } public StringColumnStatsData deepCopy() { @@ -187,6 +198,7 @@ public void clear() { this.numNulls = 0; setNumDVsIsSet(false); this.numDVs = 0; + this.bitVectors = null; } public long getMaxColLen() { @@ -277,6 +289,29 @@ public void setNumDVsIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); } + public String getBitVectors() { + return this.bitVectors; + } + + public void setBitVectors(String bitVectors) { + this.bitVectors = bitVectors; + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case MAX_COL_LEN: @@ -311,6 +346,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((String)value); + } + break; + } } @@ -328,6 +371,9 @@ public Object getFieldValue(_Fields field) { case NUM_DVS: return getNumDVs(); + case BIT_VECTORS: + return getBitVectors(); + } throw new IllegalStateException(); } @@ -347,6 +393,8 @@ public boolean isSet(_Fields field) { return isSetNumNulls(); case NUM_DVS: return isSetNumDVs(); + case BIT_VECTORS: + return isSetBitVectors(); } throw new IllegalStateException(); } @@ -400,6 +448,15 @@ public boolean equals(StringColumnStatsData that) { return false; } + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + return true; } @@ -427,6 +484,11 @@ public int hashCode() { if (present_numDVs) list.add(numDVs); + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + return list.hashCode(); } @@ -478,6 +540,16 @@ public int compareTo(StringColumnStatsData other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -513,6 +585,16 @@ public String toString() { sb.append("numDVs:"); sb.append(this.numDVs); first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + sb.append(this.bitVectors); + } + first = false; + } sb.append(")"); return sb.toString(); } @@ -606,6 +688,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, StringColumnStatsDa org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 5: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -631,6 +721,13 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, StringColumnStatsD oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); oprot.writeI64(struct.numDVs); oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeString(struct.bitVectors); + oprot.writeFieldEnd(); + } + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -652,6 +749,14 @@ public void write(org.apache.thrift.protocol.TProtocol prot, StringColumnStatsDa oprot.writeDouble(struct.avgColLen); oprot.writeI64(struct.numNulls); oprot.writeI64(struct.numDVs); + BitSet optionals = new BitSet(); + if (struct.isSetBitVectors()) { + optionals.set(0); + } + oprot.writeBitSet(optionals, 1); + if (struct.isSetBitVectors()) { + oprot.writeString(struct.bitVectors); + } } @Override @@ -665,6 +770,11 @@ public void read(org.apache.thrift.protocol.TProtocol prot, StringColumnStatsDat struct.setNumNullsIsSet(true); struct.numDVs = iprot.readI64(); struct.setNumDVsIsSet(true); + BitSet incoming = iprot.readBitSet(1); + if (incoming.get(0)) { + struct.bitVectors = iprot.readString(); + struct.setBitVectorsIsSet(true); + } } } diff --git a/metastore/src/gen/thrift/gen-php/metastore/Types.php b/metastore/src/gen/thrift/gen-php/metastore/Types.php index 380e6d0..57d1daf 100644 --- a/metastore/src/gen/thrift/gen-php/metastore/Types.php +++ b/metastore/src/gen/thrift/gen-php/metastore/Types.php @@ -5624,6 +5624,10 @@ class BooleanColumnStatsData { * @var int */ public $numNulls = null; + /** + * @var string + */ + public $bitVectors = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -5640,6 +5644,10 @@ class BooleanColumnStatsData { 'var' => 'numNulls', 'type' => TType::I64, ), + 4 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), ); } if (is_array($vals)) { @@ -5652,6 +5660,9 @@ class BooleanColumnStatsData { if (isset($vals['numNulls'])) { $this->numNulls = $vals['numNulls']; } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } } } @@ -5695,6 +5706,13 @@ class BooleanColumnStatsData { $xfer += $input->skip($ftype); } break; + case 4: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -5723,6 +5741,11 @@ class BooleanColumnStatsData { $xfer += $output->writeI64($this->numNulls); $xfer += $output->writeFieldEnd(); } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 4); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; @@ -5749,6 +5772,10 @@ class DoubleColumnStatsData { * @var int */ public $numDVs = null; + /** + * @var string + */ + public $bitVectors = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -5769,6 +5796,10 @@ class DoubleColumnStatsData { 'var' => 'numDVs', 'type' => TType::I64, ), + 5 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), ); } if (is_array($vals)) { @@ -5784,6 +5815,9 @@ class DoubleColumnStatsData { if (isset($vals['numDVs'])) { $this->numDVs = $vals['numDVs']; } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } } } @@ -5834,6 +5868,13 @@ class DoubleColumnStatsData { $xfer += $input->skip($ftype); } break; + case 5: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -5867,6 +5908,11 @@ class DoubleColumnStatsData { $xfer += $output->writeI64($this->numDVs); $xfer += $output->writeFieldEnd(); } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; @@ -5893,6 +5939,10 @@ class LongColumnStatsData { * @var int */ public $numDVs = null; + /** + * @var string + */ + public $bitVectors = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -5913,6 +5963,10 @@ class LongColumnStatsData { 'var' => 'numDVs', 'type' => TType::I64, ), + 5 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), ); } if (is_array($vals)) { @@ -5928,6 +5982,9 @@ class LongColumnStatsData { if (isset($vals['numDVs'])) { $this->numDVs = $vals['numDVs']; } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } } } @@ -5978,6 +6035,13 @@ class LongColumnStatsData { $xfer += $input->skip($ftype); } break; + case 5: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -6011,6 +6075,11 @@ class LongColumnStatsData { $xfer += $output->writeI64($this->numDVs); $xfer += $output->writeFieldEnd(); } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; @@ -6037,6 +6106,10 @@ class StringColumnStatsData { * @var int */ public $numDVs = null; + /** + * @var string + */ + public $bitVectors = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -6057,6 +6130,10 @@ class StringColumnStatsData { 'var' => 'numDVs', 'type' => TType::I64, ), + 5 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), ); } if (is_array($vals)) { @@ -6072,6 +6149,9 @@ class StringColumnStatsData { if (isset($vals['numDVs'])) { $this->numDVs = $vals['numDVs']; } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } } } @@ -6122,6 +6202,13 @@ class StringColumnStatsData { $xfer += $input->skip($ftype); } break; + case 5: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -6155,6 +6242,11 @@ class StringColumnStatsData { $xfer += $output->writeI64($this->numDVs); $xfer += $output->writeFieldEnd(); } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; @@ -6177,6 +6269,10 @@ class BinaryColumnStatsData { * @var int */ public $numNulls = null; + /** + * @var string + */ + public $bitVectors = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -6193,6 +6289,10 @@ class BinaryColumnStatsData { 'var' => 'numNulls', 'type' => TType::I64, ), + 4 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), ); } if (is_array($vals)) { @@ -6205,6 +6305,9 @@ class BinaryColumnStatsData { if (isset($vals['numNulls'])) { $this->numNulls = $vals['numNulls']; } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } } } @@ -6248,6 +6351,13 @@ class BinaryColumnStatsData { $xfer += $input->skip($ftype); } break; + case 4: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -6276,6 +6386,11 @@ class BinaryColumnStatsData { $xfer += $output->writeI64($this->numNulls); $xfer += $output->writeFieldEnd(); } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 4); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; @@ -6400,6 +6515,10 @@ class DecimalColumnStatsData { * @var int */ public $numDVs = null; + /** + * @var string + */ + public $bitVectors = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -6422,6 +6541,10 @@ class DecimalColumnStatsData { 'var' => 'numDVs', 'type' => TType::I64, ), + 5 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), ); } if (is_array($vals)) { @@ -6437,6 +6560,9 @@ class DecimalColumnStatsData { if (isset($vals['numDVs'])) { $this->numDVs = $vals['numDVs']; } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } } } @@ -6489,6 +6615,13 @@ class DecimalColumnStatsData { $xfer += $input->skip($ftype); } break; + case 5: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -6528,6 +6661,11 @@ class DecimalColumnStatsData { $xfer += $output->writeI64($this->numDVs); $xfer += $output->writeFieldEnd(); } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; @@ -6629,6 +6767,10 @@ class DateColumnStatsData { * @var int */ public $numDVs = null; + /** + * @var string + */ + public $bitVectors = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -6651,6 +6793,10 @@ class DateColumnStatsData { 'var' => 'numDVs', 'type' => TType::I64, ), + 5 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), ); } if (is_array($vals)) { @@ -6666,6 +6812,9 @@ class DateColumnStatsData { if (isset($vals['numDVs'])) { $this->numDVs = $vals['numDVs']; } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } } } @@ -6718,6 +6867,13 @@ class DateColumnStatsData { $xfer += $input->skip($ftype); } break; + case 5: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -6757,6 +6913,11 @@ class DateColumnStatsData { $xfer += $output->writeI64($this->numDVs); $xfer += $output->writeFieldEnd(); } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 409c247..77dd9a6 100644 --- a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -3749,6 +3749,7 @@ class BooleanColumnStatsData: - numTrues - numFalses - numNulls + - bitVectors """ thrift_spec = ( @@ -3756,12 +3757,14 @@ class BooleanColumnStatsData: (1, TType.I64, 'numTrues', None, None, ), # 1 (2, TType.I64, 'numFalses', None, None, ), # 2 (3, TType.I64, 'numNulls', None, None, ), # 3 + (4, TType.STRING, 'bitVectors', None, None, ), # 4 ) - def __init__(self, numTrues=None, numFalses=None, numNulls=None,): + def __init__(self, numTrues=None, numFalses=None, numNulls=None, bitVectors=None,): self.numTrues = numTrues self.numFalses = numFalses self.numNulls = numNulls + self.bitVectors = bitVectors def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -3787,6 +3790,11 @@ def read(self, iprot): self.numNulls = iprot.readI64() else: iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -3809,6 +3817,10 @@ def write(self, oprot): oprot.writeFieldBegin('numNulls', TType.I64, 3) oprot.writeI64(self.numNulls) oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 4) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -3827,6 +3839,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.numTrues) value = (value * 31) ^ hash(self.numFalses) value = (value * 31) ^ hash(self.numNulls) + value = (value * 31) ^ hash(self.bitVectors) return value def __repr__(self): @@ -3847,6 +3860,7 @@ class DoubleColumnStatsData: - highValue - numNulls - numDVs + - bitVectors """ thrift_spec = ( @@ -3855,13 +3869,15 @@ class DoubleColumnStatsData: (2, TType.DOUBLE, 'highValue', None, None, ), # 2 (3, TType.I64, 'numNulls', None, None, ), # 3 (4, TType.I64, 'numDVs', None, None, ), # 4 + (5, TType.STRING, 'bitVectors', None, None, ), # 5 ) - def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,): + def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,): self.lowValue = lowValue self.highValue = highValue self.numNulls = numNulls self.numDVs = numDVs + self.bitVectors = bitVectors def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -3892,6 +3908,11 @@ def read(self, iprot): self.numDVs = iprot.readI64() else: iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -3918,6 +3939,10 @@ def write(self, oprot): oprot.writeFieldBegin('numDVs', TType.I64, 4) oprot.writeI64(self.numDVs) oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 5) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -3935,6 +3960,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.highValue) value = (value * 31) ^ hash(self.numNulls) value = (value * 31) ^ hash(self.numDVs) + value = (value * 31) ^ hash(self.bitVectors) return value def __repr__(self): @@ -3955,6 +3981,7 @@ class LongColumnStatsData: - highValue - numNulls - numDVs + - bitVectors """ thrift_spec = ( @@ -3963,13 +3990,15 @@ class LongColumnStatsData: (2, TType.I64, 'highValue', None, None, ), # 2 (3, TType.I64, 'numNulls', None, None, ), # 3 (4, TType.I64, 'numDVs', None, None, ), # 4 + (5, TType.STRING, 'bitVectors', None, None, ), # 5 ) - def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,): + def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,): self.lowValue = lowValue self.highValue = highValue self.numNulls = numNulls self.numDVs = numDVs + self.bitVectors = bitVectors def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -4000,6 +4029,11 @@ def read(self, iprot): self.numDVs = iprot.readI64() else: iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -4026,6 +4060,10 @@ def write(self, oprot): oprot.writeFieldBegin('numDVs', TType.I64, 4) oprot.writeI64(self.numDVs) oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 5) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -4043,6 +4081,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.highValue) value = (value * 31) ^ hash(self.numNulls) value = (value * 31) ^ hash(self.numDVs) + value = (value * 31) ^ hash(self.bitVectors) return value def __repr__(self): @@ -4063,6 +4102,7 @@ class StringColumnStatsData: - avgColLen - numNulls - numDVs + - bitVectors """ thrift_spec = ( @@ -4071,13 +4111,15 @@ class StringColumnStatsData: (2, TType.DOUBLE, 'avgColLen', None, None, ), # 2 (3, TType.I64, 'numNulls', None, None, ), # 3 (4, TType.I64, 'numDVs', None, None, ), # 4 + (5, TType.STRING, 'bitVectors', None, None, ), # 5 ) - def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, numDVs=None,): + def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, numDVs=None, bitVectors=None,): self.maxColLen = maxColLen self.avgColLen = avgColLen self.numNulls = numNulls self.numDVs = numDVs + self.bitVectors = bitVectors def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -4108,6 +4150,11 @@ def read(self, iprot): self.numDVs = iprot.readI64() else: iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -4134,6 +4181,10 @@ def write(self, oprot): oprot.writeFieldBegin('numDVs', TType.I64, 4) oprot.writeI64(self.numDVs) oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 5) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -4155,6 +4206,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.avgColLen) value = (value * 31) ^ hash(self.numNulls) value = (value * 31) ^ hash(self.numDVs) + value = (value * 31) ^ hash(self.bitVectors) return value def __repr__(self): @@ -4174,6 +4226,7 @@ class BinaryColumnStatsData: - maxColLen - avgColLen - numNulls + - bitVectors """ thrift_spec = ( @@ -4181,12 +4234,14 @@ class BinaryColumnStatsData: (1, TType.I64, 'maxColLen', None, None, ), # 1 (2, TType.DOUBLE, 'avgColLen', None, None, ), # 2 (3, TType.I64, 'numNulls', None, None, ), # 3 + (4, TType.STRING, 'bitVectors', None, None, ), # 4 ) - def __init__(self, maxColLen=None, avgColLen=None, numNulls=None,): + def __init__(self, maxColLen=None, avgColLen=None, numNulls=None, bitVectors=None,): self.maxColLen = maxColLen self.avgColLen = avgColLen self.numNulls = numNulls + self.bitVectors = bitVectors def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -4212,6 +4267,11 @@ def read(self, iprot): self.numNulls = iprot.readI64() else: iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -4234,6 +4294,10 @@ def write(self, oprot): oprot.writeFieldBegin('numNulls', TType.I64, 3) oprot.writeI64(self.numNulls) oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 4) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -4252,6 +4316,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.maxColLen) value = (value * 31) ^ hash(self.avgColLen) value = (value * 31) ^ hash(self.numNulls) + value = (value * 31) ^ hash(self.bitVectors) return value def __repr__(self): @@ -4355,6 +4420,7 @@ class DecimalColumnStatsData: - highValue - numNulls - numDVs + - bitVectors """ thrift_spec = ( @@ -4363,13 +4429,15 @@ class DecimalColumnStatsData: (2, TType.STRUCT, 'highValue', (Decimal, Decimal.thrift_spec), None, ), # 2 (3, TType.I64, 'numNulls', None, None, ), # 3 (4, TType.I64, 'numDVs', None, None, ), # 4 + (5, TType.STRING, 'bitVectors', None, None, ), # 5 ) - def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,): + def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,): self.lowValue = lowValue self.highValue = highValue self.numNulls = numNulls self.numDVs = numDVs + self.bitVectors = bitVectors def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -4402,6 +4470,11 @@ def read(self, iprot): self.numDVs = iprot.readI64() else: iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -4428,6 +4501,10 @@ def write(self, oprot): oprot.writeFieldBegin('numDVs', TType.I64, 4) oprot.writeI64(self.numDVs) oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 5) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -4445,6 +4522,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.highValue) value = (value * 31) ^ hash(self.numNulls) value = (value * 31) ^ hash(self.numDVs) + value = (value * 31) ^ hash(self.bitVectors) return value def __repr__(self): @@ -4532,6 +4610,7 @@ class DateColumnStatsData: - highValue - numNulls - numDVs + - bitVectors """ thrift_spec = ( @@ -4540,13 +4619,15 @@ class DateColumnStatsData: (2, TType.STRUCT, 'highValue', (Date, Date.thrift_spec), None, ), # 2 (3, TType.I64, 'numNulls', None, None, ), # 3 (4, TType.I64, 'numDVs', None, None, ), # 4 + (5, TType.STRING, 'bitVectors', None, None, ), # 5 ) - def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None,): + def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,): self.lowValue = lowValue self.highValue = highValue self.numNulls = numNulls self.numDVs = numDVs + self.bitVectors = bitVectors def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -4579,6 +4660,11 @@ def read(self, iprot): self.numDVs = iprot.readI64() else: iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -4605,6 +4691,10 @@ def write(self, oprot): oprot.writeFieldBegin('numDVs', TType.I64, 4) oprot.writeI64(self.numDVs) oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 5) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -4622,6 +4712,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.highValue) value = (value * 31) ^ hash(self.numNulls) value = (value * 31) ^ hash(self.numDVs) + value = (value * 31) ^ hash(self.bitVectors) return value def __repr__(self): diff --git a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index a473611..2cf433b 100644 --- a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -824,11 +824,13 @@ class BooleanColumnStatsData NUMTRUES = 1 NUMFALSES = 2 NUMNULLS = 3 + BITVECTORS = 4 FIELDS = { NUMTRUES => {:type => ::Thrift::Types::I64, :name => 'numTrues'}, NUMFALSES => {:type => ::Thrift::Types::I64, :name => 'numFalses'}, - NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'} + NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} } def struct_fields; FIELDS; end @@ -848,12 +850,14 @@ class DoubleColumnStatsData HIGHVALUE = 2 NUMNULLS = 3 NUMDVS = 4 + BITVECTORS = 5 FIELDS = { LOWVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'lowValue', :optional => true}, HIGHVALUE => {:type => ::Thrift::Types::DOUBLE, :name => 'highValue', :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, - NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} + NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} } def struct_fields; FIELDS; end @@ -872,12 +876,14 @@ class LongColumnStatsData HIGHVALUE = 2 NUMNULLS = 3 NUMDVS = 4 + BITVECTORS = 5 FIELDS = { LOWVALUE => {:type => ::Thrift::Types::I64, :name => 'lowValue', :optional => true}, HIGHVALUE => {:type => ::Thrift::Types::I64, :name => 'highValue', :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, - NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} + NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} } def struct_fields; FIELDS; end @@ -896,12 +902,14 @@ class StringColumnStatsData AVGCOLLEN = 2 NUMNULLS = 3 NUMDVS = 4 + BITVECTORS = 5 FIELDS = { MAXCOLLEN => {:type => ::Thrift::Types::I64, :name => 'maxColLen'}, AVGCOLLEN => {:type => ::Thrift::Types::DOUBLE, :name => 'avgColLen'}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, - NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} + NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} } def struct_fields; FIELDS; end @@ -921,11 +929,13 @@ class BinaryColumnStatsData MAXCOLLEN = 1 AVGCOLLEN = 2 NUMNULLS = 3 + BITVECTORS = 4 FIELDS = { MAXCOLLEN => {:type => ::Thrift::Types::I64, :name => 'maxColLen'}, AVGCOLLEN => {:type => ::Thrift::Types::DOUBLE, :name => 'avgColLen'}, - NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'} + NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} } def struct_fields; FIELDS; end @@ -965,12 +975,14 @@ class DecimalColumnStatsData HIGHVALUE = 2 NUMNULLS = 3 NUMDVS = 4 + BITVECTORS = 5 FIELDS = { LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Decimal, :optional => true}, HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Decimal, :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, - NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} + NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} } def struct_fields; FIELDS; end @@ -1006,12 +1018,14 @@ class DateColumnStatsData HIGHVALUE = 2 NUMNULLS = 3 NUMDVS = 4 + BITVECTORS = 5 FIELDS = { LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Date, :optional => true}, HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Date, :optional => true}, NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, - NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'} + NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :optional => true} } def struct_fields; FIELDS; end diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java new file mode 100644 index 0000000..da12923 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java @@ -0,0 +1,359 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; +import java.util.Random; + +import javolution.util.FastBitSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.io.Text; + +public class NumDistinctValueEstimator { + + static final Log LOG = LogFactory.getLog(NumDistinctValueEstimator.class.getName()); + + /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. + * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. + * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise + * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 + * thus introducing errors in the estimates. + */ + private static final int BIT_VECTOR_SIZE = 31; + private final int numBitVectors; + + // Refer to Flajolet-Martin'86 for the value of phi + private static final double PHI = 0.77351; + + private final int[] a; + private final int[] b; + private final FastBitSet[] bitVector; + + private final Random aValue; + private final Random bValue; + + /* Create a new distinctValueEstimator + */ + public NumDistinctValueEstimator(int numBitVectors) { + this.numBitVectors = numBitVectors; + bitVector = new FastBitSet[numBitVectors]; + for (int i=0; i< numBitVectors; i++) { + bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE); + } + + a = new int[numBitVectors]; + b = new int[numBitVectors]; + + /* Use a large prime number as a seed to the random number generator. + * Java's random number generator uses the Linear Congruential Generator to generate random + * numbers using the following recurrence relation, + * + * X(n+1) = (a X(n) + c ) mod m + * + * where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48 + * is not a prime number and hence the set of numbers from 0 to m don't form a finite field. + * If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair + * wise independent. + * + * However, empirically passing in prime numbers as seeds seems to work better than when passing + * composite numbers as seeds. Ideally Java's Random should pick m such that m is prime. + * + */ + aValue = new Random(99397); + bValue = new Random(9876413); + + for (int i = 0; i < numBitVectors; i++) { + int randVal; + /* a and b shouldn't be even; If a and b are even, then none of the values + * will set bit 0 thus introducing errors in the estimate. Both a and b can be even + * 25% of the times and as a result 25% of the bit vectors could be inaccurate. To avoid this + * always pick odd values for a and b. + */ + do { + randVal = aValue.nextInt(); + } while (randVal % 2 == 0); + + a[i] = randVal; + + do { + randVal = bValue.nextInt(); + } while (randVal % 2 == 0); + + b[i] = randVal; + + if (a[i] < 0) { + a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1); + } + + if (b[i] < 0) { + b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1); + } + } + } + + public NumDistinctValueEstimator(String s, int numBitVectors) { + this.numBitVectors = numBitVectors; + FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors); + bitVector = new FastBitSet[numBitVectors]; + for(int i=0; i = '0' && c <= '9') { + String t = new String(); + t = t + c; + c = s.charAt(i); + i = i + 1; + + while (c != ',' && c!= '}') { + t = t + c; + c = s.charAt(i); + i = i + 1; + } + + int bitIndex = Integer.parseInt(t); + assert(bitIndex >= 0); + assert(vectorIndex < numBitVectors); + b[vectorIndex].set(bitIndex); + if (c == '}') { + vectorIndex = vectorIndex + 1; + } + } + } + return b; + } + + private int generateHash(long v, int hashNum) { + int mod = (1<> 1; + } + + // Set bitvector[index] := 1 + bitVector[i].set(index); + } + } + + public void addToEstimatorPCSA(long v) { + int hash = generateHashForPCSA(v); + int rho = hash/numBitVectors; + int index; + + // Find the index of the least significant bit that is 1 + for (index=0; index> 1; + } + + // Set bitvector[index] := 1 + bitVector[hash%numBitVectors].set(index); + } + + public void addToEstimator(double d) { + int v = new Double(d).hashCode(); + addToEstimator(v); + } + + public void addToEstimatorPCSA(double d) { + int v = new Double(d).hashCode(); + addToEstimatorPCSA(v); + } + + public void addToEstimator(HiveDecimal decimal) { + int v = decimal.hashCode(); + addToEstimator(v); + } + + public void addToEstimatorPCSA(HiveDecimal decimal) { + int v = decimal.hashCode(); + addToEstimatorPCSA(v); + } + + public void mergeEstimators(NumDistinctValueEstimator o) { + // Bitwise OR the bitvector with the bitvector in the agg buffer + for (int i=0; i() { @Override public AggrStats load(StatsCacheKey key) throws Exception { + int numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); HBaseReadWrite hrw = HBaseReadWrite.getInstance(); AggrStats aggrStats = hrw.getAggregatedStats(key.hashed); if (aggrStats == null) { @@ -103,7 +106,7 @@ public AggrStats load(StatsCacheKey key) throws Exception { } if (aggregator == null) { aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator( - cso.getStatsData().getSetField()); + cso.getStatsData().getSetField(), numBitVectors); } aggregator.aggregate(statsObj, cso); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java index bbd2c7b..40340dd 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BinaryColumnStatsAggregator.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -public class BinaryColumnStatsAggregator implements ColumnStatsAggregator{ +public class BinaryColumnStatsAggregator extends ColumnStatsAggregator{ @Override public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java index 9047f68..735d965 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/BooleanColumnStatsAggregator.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -public class BooleanColumnStatsAggregator implements ColumnStatsAggregator { +public class BooleanColumnStatsAggregator extends ColumnStatsAggregator { @Override public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java index 217b654..694e53b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregator.java @@ -19,8 +19,10 @@ package org.apache.hadoop.hive.metastore.hbase.stats; +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -public interface ColumnStatsAggregator { - public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats); +public abstract class ColumnStatsAggregator { + NumDistinctValueEstimator ndvEstimator = null; + public abstract void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java index a8dbc1f..8eb127b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/ColumnStatsAggregatorFactory.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats; +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -34,23 +35,34 @@ private ColumnStatsAggregatorFactory() { } - public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type) { + public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, int numBitVectors) { + ColumnStatsAggregator agg; switch (type) { case BOOLEAN_STATS: - return new BooleanColumnStatsAggregator(); + agg = new BooleanColumnStatsAggregator(); + break; case LONG_STATS: - return new LongColumnStatsAggregator(); + agg = new LongColumnStatsAggregator(); + break; case DOUBLE_STATS: - return new DoubleColumnStatsAggregator(); + agg = new DoubleColumnStatsAggregator(); + break; case STRING_STATS: - return new StringColumnStatsAggregator(); + agg = new StringColumnStatsAggregator(); + break; case BINARY_STATS: - return new BinaryColumnStatsAggregator(); + agg = new BinaryColumnStatsAggregator(); + break; case DECIMAL_STATS: - return new DecimalColumnStatsAggregator(); + agg = new DecimalColumnStatsAggregator(); + break; default: throw new RuntimeException("Woh, bad. Unknown stats type " + type.toString()); } + if (numBitVectors > 0) { + agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + } + return agg; } public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java index ec25b31..41084f9 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DecimalColumnStatsAggregator.java @@ -19,11 +19,12 @@ package org.apache.hadoop.hive.metastore.hbase.stats; +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; -public class DecimalColumnStatsAggregator implements ColumnStatsAggregator { +public class DecimalColumnStatsAggregator extends ColumnStatsAggregator { @Override public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { @@ -38,6 +39,12 @@ public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj .getHighValue() : newData.getHighValue(); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + if (ndvEstimator == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java index 71af0ac..5914bad 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java @@ -19,10 +19,11 @@ package org.apache.hadoop.hive.metastore.hbase.stats; +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; -public class DoubleColumnStatsAggregator implements ColumnStatsAggregator { +public class DoubleColumnStatsAggregator extends ColumnStatsAggregator { @Override public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { @@ -31,6 +32,12 @@ public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + if (ndvEstimator == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java index 15b8cf7..0dc743f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java @@ -19,10 +19,11 @@ package org.apache.hadoop.hive.metastore.hbase.stats; +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -public class LongColumnStatsAggregator implements ColumnStatsAggregator { +public class LongColumnStatsAggregator extends ColumnStatsAggregator { @Override public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { @@ -31,6 +32,12 @@ public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + if (ndvEstimator == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java index fe1a04c..a2a1f75 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java @@ -19,10 +19,11 @@ package org.apache.hadoop.hive.metastore.hbase.stats; +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; -public class StringColumnStatsAggregator implements ColumnStatsAggregator { +public class StringColumnStatsAggregator extends ColumnStatsAggregator { @Override public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { @@ -31,6 +32,12 @@ public void aggregate(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); - aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + if (ndvEstimator == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } } } diff --git a/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto b/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto index 0d0ef89..466fdf9 100644 --- a/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto +++ b/metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto @@ -93,6 +93,7 @@ message ColumnStats { optional StringStats binary_stats = 9; optional DecimalStats decimal_stats = 10; optional string column_name = 11; + optional string bit_vectors = 12; } message Database { diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java new file mode 100644 index 0000000..c377d1b --- /dev/null +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java @@ -0,0 +1,627 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.FunctionType; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.ResourceType; +import org.apache.hadoop.hive.metastore.api.ResourceUri; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SkewedInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.io.IOException; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * + */ +public class TestHBaseStoreBitVector { + private static final Logger LOG = LoggerFactory.getLogger(TestHBaseStoreBitVector.class.getName()); + static Map emptyParameters = new HashMap(); + // Table with NUM_PART_KEYS partitioning keys and NUM_PARTITIONS values per key + static final int NUM_PART_KEYS = 1; + static final int NUM_PARTITIONS = 5; + static final String DB = "db"; + static final String TBL = "tbl"; + static final String COL = "col"; + static final String PART_KEY_PREFIX = "part"; + static final String PART_VAL_PREFIX = "val"; + static final String PART_KV_SEPARATOR = "="; + static final List PART_KEYS = new ArrayList(); + static final List PART_VALS = new ArrayList(); + // Initialize mock partitions + static { + for (int i = 1; i <= NUM_PART_KEYS; i++) { + PART_KEYS.add(PART_KEY_PREFIX + i); + } + for (int i = 1; i <= NUM_PARTITIONS; i++) { + PART_VALS.add(PART_VAL_PREFIX + i); + } + } + static final long DEFAULT_TIME = System.currentTimeMillis(); + static final String PART_KEY = "part"; + static final String LONG_COL = "longCol"; + static final String LONG_TYPE = "long"; + static final String INT_TYPE = "int"; + static final String INT_VAL = "1234"; + static final String DOUBLE_COL = "doubleCol"; + static final String DOUBLE_TYPE = "double"; + static final String DOUBLE_VAL = "3.1415"; + static final String STRING_COL = "stringCol"; + static final String STRING_TYPE = "string"; + static final String STRING_VAL = "stringval"; + static final String DECIMAL_COL = "decimalCol"; + static final String DECIMAL_TYPE = "decimal(5,3)"; + static final String DECIMAL_VAL = "12.123"; + static List longColStatsObjs = new ArrayList( + NUM_PARTITIONS); + static List doubleColStatsObjs = new ArrayList( + NUM_PARTITIONS); + static List stringColStatsObjs = new ArrayList( + NUM_PARTITIONS); + static List decimalColStatsObjs = new ArrayList( + NUM_PARTITIONS); + + @Rule public ExpectedException thrown = ExpectedException.none(); + @Mock HTableInterface htable; + SortedMap rows = new TreeMap<>(); + HBaseStore store; + + + @BeforeClass + public static void beforeTest() { + // All data intitializations + populateMockStats(); + } + + private static void populateMockStats() { + ColumnStatisticsObj statsObj; + // Add NUM_PARTITIONS ColumnStatisticsObj of each type + // For aggregate stats test, we'll treat each ColumnStatisticsObj as stats for 1 partition + // For the rest, we'll just pick the 1st ColumnStatisticsObj from this list and use it + for (int i = 0; i < NUM_PARTITIONS; i++) { + statsObj = mockLongStats(i); + longColStatsObjs.add(statsObj); + statsObj = mockDoubleStats(i); + doubleColStatsObjs.add(statsObj); + statsObj = mockStringStats(i); + stringColStatsObjs.add(statsObj); + statsObj = mockDecimalStats(i); + decimalColStatsObjs.add(statsObj); + } + } + + private static ColumnStatisticsObj mockLongStats(int i) { + long high = 120938479124L + 100*i; + long low = -12341243213412124L - 50*i; + long nulls = 23 + i; + long dVs = 213L + 10*i; + String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{1, 2, 3, 4, 5, 6, 7, 8}"; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(LONG_COL); + colStatsObj.setColType(LONG_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData longData = new LongColumnStatsData(); + longData.setHighValue(high); + longData.setLowValue(low); + longData.setNumNulls(nulls); + longData.setNumDVs(dVs); + longData.setBitVectors(bitVectors); + data.setLongStats(longData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + private static ColumnStatisticsObj mockDoubleStats(int i) { + double high = 123423.23423 + 100*i; + double low = 0.00001234233 - 50*i; + long nulls = 92 + i; + long dVs = 1234123421L + 10*i; + String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 2, 3, 4, 5, 6, 7, 8}"; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(DOUBLE_COL); + colStatsObj.setColType(DOUBLE_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); + doubleData.setHighValue(high); + doubleData.setLowValue(low); + doubleData.setNumNulls(nulls); + doubleData.setNumDVs(dVs); + doubleData.setBitVectors(bitVectors); + data.setDoubleStats(doubleData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + private static ColumnStatisticsObj mockStringStats(int i) { + long maxLen = 1234 + 10*i; + double avgLen = 32.3 + i; + long nulls = 987 + 10*i; + long dVs = 906 + i; + String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 3, 4, 5, 6, 7, 8}"; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(STRING_COL); + colStatsObj.setColType(STRING_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + StringColumnStatsData stringData = new StringColumnStatsData(); + stringData.setMaxColLen(maxLen); + stringData.setAvgColLen(avgLen); + stringData.setNumNulls(nulls); + stringData.setNumDVs(dVs); + stringData.setBitVectors(bitVectors); + data.setStringStats(stringData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + private static ColumnStatisticsObj mockDecimalStats(int i) { + Decimal high = new Decimal(); + high.setScale((short)3); + String strHigh = String.valueOf(3876 + 100*i); + high.setUnscaled(strHigh.getBytes()); + Decimal low = new Decimal(); + low.setScale((short)3); + String strLow = String.valueOf(38 + i); + low.setUnscaled(strLow.getBytes()); + long nulls = 13 + i; + long dVs = 923947293L + 100*i; + String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 2, 4, 5, 6, 7, 8}"; + ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(); + colStatsObj.setColName(DECIMAL_COL); + colStatsObj.setColType(DECIMAL_TYPE); + ColumnStatisticsData data = new ColumnStatisticsData(); + DecimalColumnStatsData decimalData = new DecimalColumnStatsData(); + decimalData.setHighValue(high); + decimalData.setLowValue(low); + decimalData.setNumNulls(nulls); + decimalData.setNumDVs(dVs); + decimalData.setBitVectors(bitVectors); + data.setDecimalStats(decimalData); + colStatsObj.setStatsData(data); + return colStatsObj; + } + + @AfterClass + public static void afterTest() { + } + + + @Before + public void init() throws IOException { + MockitoAnnotations.initMocks(this); + HiveConf conf = new HiveConf(); + conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); + store = MockUtils.init(conf, htable, rows); + } + + @Test + public void longTableStatistics() throws Exception { + // Add a long table stats for LONG_COL to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = longColStatsObjs.get(0); + LongColumnStatsData longData = obj.getStatsData().getLongStats(); + // Add to DB + stats.addToStatsObj(obj); + store.updateTableColumnStatistics(stats); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField()); + // Compare LongColumnStatsData + LongColumnStatsData longDataFromDB = dataFromDB.getLongStats(); + Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue()); + Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue()); + Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls()); + Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs()); + Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors()); + } + + @Test + public void doubleTableStatistics() throws Exception { + // Add a double table stats for DOUBLE_COL to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = doubleColStatsObjs.get(0); + DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats(); + // Add to DB + stats.addToStatsObj(obj); + store.updateTableColumnStatistics(stats); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField()); + // Compare DoubleColumnStatsData + DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats(); + Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01); + Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01); + Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls()); + Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs()); + Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors()); + } + + @Test + public void stringTableStatistics() throws Exception { + // Add a string table stats for STRING_COL to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = stringColStatsObjs.get(0); + StringColumnStatsData stringData = obj.getStatsData().getStringStats(); + // Add to DB + stats.addToStatsObj(obj); + store.updateTableColumnStatistics(stats); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(STRING_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField()); + // Compare StringColumnStatsData + StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats(); + Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen()); + Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01); + Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls()); + Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs()); + Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors()); + } + + @Test + public void decimalTableStatistics() throws Exception { + // Add a decimal table stats for DECIMAL_COL to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for table level stats + ColumnStatisticsDesc desc = getMockTblColStatsDesc(); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = decimalColStatsObjs.get(0); + DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats(); + // Add to DB + stats.addToStatsObj(obj); + store.updateTableColumnStatistics(stats); + // Get from DB + ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL)); + // Compare ColumnStatisticsDesc + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName()); + Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField()); + // Compare DecimalColumnStatsData + DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats(); + Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue()); + Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue()); + Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls()); + Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs()); + Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors()); + } + + @Test + public void longPartitionStatistics() throws Exception { + createMockTableAndPartition(INT_TYPE, INT_VAL); + // Add partition stats for: LONG_COL and partition: {PART_KEY, INT_VAL} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, INT_VAL); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = longColStatsObjs.get(0); + LongColumnStatsData longData = obj.getStatsData().getLongStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(INT_VAL); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField()); + // Compare LongColumnStatsData + LongColumnStatsData longDataFromDB = dataFromDB.getLongStats(); + Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue()); + Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue()); + Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls()); + Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs()); + Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors()); + } + + @Test + public void doublePartitionStatistics() throws Exception { + createMockTableAndPartition(DOUBLE_TYPE, DOUBLE_VAL); + // Add partition stats for: DOUBLE_COL and partition: {PART_KEY, DOUBLE_VAL} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DOUBLE_VAL); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = doubleColStatsObjs.get(0); + DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(DOUBLE_VAL); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField()); + // Compare DoubleColumnStatsData + DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats(); + Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01); + Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01); + Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls()); + Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs()); + Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors()); + } + + @Test + public void stringPartitionStatistics() throws Exception { + createMockTableAndPartition(STRING_TYPE, STRING_VAL); + // Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = stringColStatsObjs.get(0); + StringColumnStatsData stringData = obj.getStatsData().getStringStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(STRING_VAL); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField()); + // Compare StringColumnStatsData + StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats(); + Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen()); + Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01); + Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls()); + Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs()); + Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors()); + } + + @Test + public void decimalPartitionStatistics() throws Exception { + createMockTableAndPartition(DECIMAL_TYPE, DECIMAL_VAL); + // Add partition stats for: DECIMAL_COL and partition: {PART_KEY, DECIMAL_VAL} to DB + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + ColumnStatistics stats = new ColumnStatistics(); + // Get a default ColumnStatisticsDesc for partition level stats + ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DECIMAL_VAL); + stats.setStatsDesc(desc); + // Get one of the pre-created ColumnStatisticsObj + ColumnStatisticsObj obj = decimalColStatsObjs.get(0); + DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats(); + // Add to DB + stats.addToStatsObj(obj); + List parVals = new ArrayList(); + parVals.add(DECIMAL_VAL); + store.updatePartitionColumnStatistics(stats, parVals); + // Get from DB + List partNames = new ArrayList(); + partNames.add(desc.getPartName()); + List colNames = new ArrayList(); + colNames.add(obj.getColName()); + List statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames); + // Compare ColumnStatisticsDesc + Assert.assertEquals(1, statsFromDB.size()); + Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName()); + Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName()); + Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel()); + // Compare ColumnStatisticsObj + Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize()); + ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0); + ColumnStatisticsData dataFromDB = objFromDB.getStatsData(); + // Compare ColumnStatisticsData + Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField()); + // Compare DecimalColumnStatsData + DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats(); + Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue()); + Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue()); + Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls()); + Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs()); + Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors()); + } + + private Table createMockTableAndPartition(String partType, String partVal) throws Exception { + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", partType, "")); + List vals = new ArrayList(); + vals.add(partVal); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + Map params = new HashMap(); + params.put("key", "value"); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, + serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); + int currentTime = (int)(System.currentTimeMillis() / 1000); + Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols, + emptyParameters, null, null, null); + store.createTable(table); + Partition part = new Partition(vals, DB, TBL, currentTime, currentTime, sd, + emptyParameters); + store.addPartition(part); + return table; + } + /** + * Returns a dummy table level ColumnStatisticsDesc with default values + */ + private ColumnStatisticsDesc getMockTblColStatsDesc() { + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(DEFAULT_TIME); + desc.setDbName(DB); + desc.setTableName(TBL); + desc.setIsTblLevel(true); + return desc; + } + + /** + * Returns a dummy partition level ColumnStatisticsDesc + */ + private ColumnStatisticsDesc getMockPartColStatsDesc(String partKey, String partVal) { + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(DEFAULT_TIME); + desc.setDbName(DB); + desc.setTableName(TBL); + // part1=val1 + desc.setPartName(partKey + PART_KV_SEPARATOR + partVal); + desc.setIsTblLevel(false); + return desc; + } + +} diff --git a/ql/pom.xml b/ql/pom.xml index 5075185..2d09dfc 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -397,11 +397,6 @@ ${guava.version} - com.google.protobuf - protobuf-java - ${protobuf.version} - - com.googlecode.javaewah JavaEWAH ${javaewah.version} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index f6fbe74..ec92ed0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -119,6 +119,10 @@ private void unpackDoubleStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("min")) { double d = ((DoubleObjectInspector) oi).get(o); statsObj.getStatsData().getDoubleStats().setLowValue(d); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDoubleStats().setBitVectors(v);; } } @@ -136,6 +140,10 @@ private void unpackDecimalStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("min")) { HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d)); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setBitVectors(v);; } } @@ -157,6 +165,10 @@ private void unpackLongStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("min")) { long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getLongStats().setLowValue(v); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getLongStats().setBitVectors(v);; } } @@ -174,6 +186,10 @@ private void unpackStringStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("maxlength")) { long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getStringStats().setMaxColLen(v); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getStringStats().setBitVectors(v);; } } @@ -205,6 +221,10 @@ private void unpackDateStats(ObjectInspector oi, Object o, String fName, } else if (fName.equals("min")) { DateWritable v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDateStats().setBitVectors(v);; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 1f30cbd..bb1bbad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -25,6 +25,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveVariableSource; @@ -201,60 +203,6 @@ private String getColTypeOf (String partKey) throws SemanticException{ throw new SemanticException ("Unknown partition key : " + partKey); } - private int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException { - int numBitVectors; - float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR); - - if (percentageError < 0.0) { - throw new SemanticException("hive.stats.ndv.error can't be negative"); - } else if (percentageError <= 2.4) { - numBitVectors = 1024; - LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%"); - LOG.info("Choosing 1024 bit vectors.."); - } else if (percentageError <= 3.4 ) { - numBitVectors = 1024; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 1024 bit vectors.."); - } else if (percentageError <= 4.8) { - numBitVectors = 512; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 512 bit vectors.."); - } else if (percentageError <= 6.8) { - numBitVectors = 256; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 256 bit vectors.."); - } else if (percentageError <= 9.7) { - numBitVectors = 128; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 128 bit vectors.."); - } else if (percentageError <= 13.8) { - numBitVectors = 64; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 64 bit vectors.."); - } else if (percentageError <= 19.6) { - numBitVectors = 32; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 32 bit vectors.."); - } else if (percentageError <= 28.2) { - numBitVectors = 16; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 16 bit vectors.."); - } else if (percentageError <= 40.9) { - numBitVectors = 8; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 8 bit vectors.."); - } else if (percentageError <= 61.0) { - numBitVectors = 4; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 4 bit vectors.."); - } else { - numBitVectors = 2; - LOG.info("Error requested is " + percentageError + "%"); - LOG.info("Choosing 2 bit vectors.."); - } - return numBitVectors; - } - private List getColumnTypes(List colNames) throws SemanticException{ List colTypes = new LinkedList(); @@ -396,7 +344,12 @@ public void analyze(ASTNode ast, Context origCtx) throws SemanticException { isTableLevel = true; } colType = getColumnTypes(colNames); - int numBitVectors = getNumBitVectorsForNDVEstimation(conf); + int numBitVectors; + try { + numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); + } catch (Exception e) { + throw new SemanticException(e.getMessage()); + } rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats); rewrittenTree = genRewrittenTree(rewrittenQuery); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index b4cf58f..ea506fc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -1558,4 +1559,58 @@ public static long safeMult(long a, long b) { return Long.MAX_VALUE; } } + + public static int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException { + int numBitVectors; + float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR); + + if (percentageError < 0.0) { + throw new SemanticException("hive.stats.ndv.error can't be negative"); + } else if (percentageError <= 2.4) { + numBitVectors = 1024; + LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%"); + LOG.info("Choosing 1024 bit vectors.."); + } else if (percentageError <= 3.4 ) { + numBitVectors = 1024; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 1024 bit vectors.."); + } else if (percentageError <= 4.8) { + numBitVectors = 512; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 512 bit vectors.."); + } else if (percentageError <= 6.8) { + numBitVectors = 256; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 256 bit vectors.."); + } else if (percentageError <= 9.7) { + numBitVectors = 128; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 128 bit vectors.."); + } else if (percentageError <= 13.8) { + numBitVectors = 64; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 64 bit vectors.."); + } else if (percentageError <= 19.6) { + numBitVectors = 32; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 32 bit vectors.."); + } else if (percentageError <= 28.2) { + numBitVectors = 16; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 16 bit vectors.."); + } else if (percentageError <= 40.9) { + numBitVectors = 8; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 8 bit vectors.."); + } else if (percentageError <= 61.0) { + numBitVectors = 4; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 4 bit vectors.."); + } else { + numBitVectors = 2; + LOG.info("Error requested is " + percentageError + "%"); + LOG.info("Choosing 2 bit vectors.."); + } + return numBitVectors; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 0e96f89..ab47617 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -401,6 +401,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc foi.add(getValueObjectInspector()); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); List fname = new ArrayList(); fname.add("columnType"); @@ -408,11 +409,13 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc fname.add("max"); fname.add("countnulls"); fname.add("numdistinctvalues"); + fname.add("ndvbitvector"); - result = new Object[5]; + result = new Object[6]; result[0] = new Text(); result[3] = new LongWritable(0); result[4] = new LongWritable(0); + result[5] = new Text(); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); @@ -448,6 +451,7 @@ protected Object serialize(Object[] result) { serializeCommon(result); long dv = numDV != null ? numDV.estimateNumDistinctValues() : 0; ((LongWritable) result[4]).set(dv); + ((Text) result[5]).set(numDV.serialize()); return result; } @@ -795,6 +799,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); List fname = new ArrayList(); fname.add("columntype"); @@ -802,13 +807,15 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc fname.add("avglength"); fname.add("countnulls"); fname.add("numdistinctvalues"); + fname.add("ndvbitvector"); - result = new Object[5]; + result = new Object[6]; result[0] = new Text(); result[1] = new LongWritable(0); result[2] = new DoubleWritable(0); result[3] = new LongWritable(0); result[4] = new LongWritable(0); + result[5] = new Text(); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); @@ -1003,6 +1010,7 @@ public Object terminate(AggregationBuffer agg) throws HiveException { ((DoubleWritable) result[2]).set(avgLength); ((LongWritable) result[3]).set(myagg.countNulls); ((LongWritable) result[4]).set(numDV); + ((Text) result[5]).set(myagg.numDV.serialize()); return result; }