diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index fd2c924..3c13b63 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -148,6 +148,7 @@ minitez.query.files.shared=acid_globallimit.q,\ metadata_only_queries.q,\ metadata_only_queries_with_filters.q,\ nonmr_fetch_threshold.q,\ + order_null.q,\ optimize_nullscan.q,\ orc_analyze.q,\ orc_merge1.q,\ diff --git metastore/if/hive_metastore.thrift metastore/if/hive_metastore.thrift index 9d8c092..341aa21 100755 --- metastore/if/hive_metastore.thrift +++ metastore/if/hive_metastore.thrift @@ -226,8 +226,9 @@ struct SerDeInfo { // sort order of a column (column name along with asc(1)/desc(0)) struct Order { - 1: string col, // sort column name - 2: i32 order // asc(1) or desc(0) + 1: string col, // sort column name + 2: i32 order, // asc(1) or desc(0) + 3: i32 nullOrder // nulls first(0) or nulls last(1) } // this object holds all the information about skewed table diff --git metastore/scripts/upgrade/derby/033-HIVE-12994.derby.sql metastore/scripts/upgrade/derby/033-HIVE-12994.derby.sql new file mode 100644 index 0000000..f6ec895 --- /dev/null +++ metastore/scripts/upgrade/derby/033-HIVE-12994.derby.sql @@ -0,0 +1 @@ +ALTER TABLE "APP"."SORT_COLS" ADD "NULL_ORDER" INTEGER NOT NULL; diff --git metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql index 660a85a..c4dd9d7 100644 --- metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql +++ metastore/scripts/upgrade/derby/hive-schema-2.1.0.derby.sql @@ -28,7 +28,7 @@ CREATE TABLE "APP"."SERDE_PARAMS" ("SERDE_ID" BIGINT NOT NULL, "PARAM_KEY" VARCH CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(1000) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL); -CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); +CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "ORDER" INTEGER NOT NULL, "NULL_ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); CREATE TABLE "APP"."CDS" ("CD_ID" BIGINT NOT NULL); diff --git metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql index 30de00b..4058e32 100644 --- metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql +++ metastore/scripts/upgrade/derby/upgrade-2.0.0-to-2.1.0.derby.sql @@ -1,3 +1,4 @@ -- Upgrade MetaStore schema from 2.0.0 to 2.1.0 +RUN '033-HIVE-12994.derby.sql'; UPDATE "APP".VERSION SET SCHEMA_VERSION='2.1.0', VERSION_COMMENT='Hive release version 2.1.0' where VER_ID=1; diff --git metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/Metastore.java metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/Metastore.java index 416ae9d..ca928b9 100644 --- metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/Metastore.java +++ metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/Metastore.java @@ -447,15 +447,15 @@ public Builder mergeFrom(org.apache.hadoop.hive.metastore.Metastore.SplitInfo ot public final boolean isInitialized() { if (!hasOffset()) { - + return false; } if (!hasLength()) { - + return false; } if (!hasIndex()) { - + return false; } return true; @@ -597,7 +597,7 @@ public Builder clearIndex() { /** * repeated .org.apache.hadoop.hive.metastore.SplitInfo infos = 1; */ - java.util.List + java.util.List getInfosList(); /** * repeated .org.apache.hadoop.hive.metastore.SplitInfo infos = 1; @@ -610,7 +610,7 @@ public Builder clearIndex() { /** * repeated .org.apache.hadoop.hive.metastore.SplitInfo infos = 1; */ - java.util.List + java.util.List getInfosOrBuilderList(); /** * repeated .org.apache.hadoop.hive.metastore.SplitInfo infos = 1; @@ -731,7 +731,7 @@ public SplitInfos parsePartialFrom( /** * repeated .org.apache.hadoop.hive.metastore.SplitInfo infos = 1; */ - public java.util.List + public java.util.List getInfosOrBuilderList() { return infos_; } @@ -984,7 +984,7 @@ public Builder mergeFrom(org.apache.hadoop.hive.metastore.Metastore.SplitInfos o infosBuilder_ = null; infos_ = other.infos_; bitField0_ = (bitField0_ & ~0x00000001); - infosBuilder_ = + infosBuilder_ = com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders ? getInfosFieldBuilder() : null; } else { @@ -999,7 +999,7 @@ public Builder mergeFrom(org.apache.hadoop.hive.metastore.Metastore.SplitInfos o public final boolean isInitialized() { for (int i = 0; i < getInfosCount(); i++) { if (!getInfos(i).isInitialized()) { - + return false; } } @@ -1220,7 +1220,7 @@ public Builder removeInfos(int index) { /** * repeated .org.apache.hadoop.hive.metastore.SplitInfo infos = 1; */ - public java.util.List + public java.util.List getInfosOrBuilderList() { if (infosBuilder_ != null) { return infosBuilder_.getMessageOrBuilderList(); @@ -1246,12 +1246,12 @@ public Builder removeInfos(int index) { /** * repeated .org.apache.hadoop.hive.metastore.SplitInfo infos = 1; */ - public java.util.List + public java.util.List getInfosBuilderList() { return getInfosFieldBuilder().getBuilderList(); } private com.google.protobuf.RepeatedFieldBuilder< - org.apache.hadoop.hive.metastore.Metastore.SplitInfo, org.apache.hadoop.hive.metastore.Metastore.SplitInfo.Builder, org.apache.hadoop.hive.metastore.Metastore.SplitInfoOrBuilder> + org.apache.hadoop.hive.metastore.Metastore.SplitInfo, org.apache.hadoop.hive.metastore.Metastore.SplitInfo.Builder, org.apache.hadoop.hive.metastore.Metastore.SplitInfoOrBuilder> getInfosFieldBuilder() { if (infosBuilder_ == null) { infosBuilder_ = new com.google.protobuf.RepeatedFieldBuilder< diff --git metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java index 3b2d7b5..3057fff 100644 --- metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java +++ metastore/src/gen/protobuf/gen-java/org/apache/hadoop/hive/metastore/hbase/HbaseMetastoreProto.java @@ -22582,6 +22582,16 @@ public StorageDescriptor parsePartialFrom( * optional sint32 order = 2 [default = 1]; */ int getOrder(); + + // optional sint32 nullOrder = 3 [default = 0]; + /** + * optional sint32 nullOrder = 3 [default = 0]; + */ + boolean hasNullOrder(); + /** + * optional sint32 nullOrder = 3 [default = 0]; + */ + int getNullOrder(); } /** * Protobuf type {@code org.apache.hadoop.hive.metastore.hbase.StorageDescriptor.Order} @@ -22644,6 +22654,11 @@ private Order( order_ = input.readSInt32(); break; } + case 24: { + bitField0_ |= 0x00000004; + nullOrder_ = input.readSInt32(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -22743,9 +22758,26 @@ public int getOrder() { return order_; } + // optional sint32 nullOrder = 3 [default = 0]; + public static final int NULLORDER_FIELD_NUMBER = 3; + private int nullOrder_; + /** + * optional sint32 nullOrder = 3 [default = 0]; + */ + public boolean hasNullOrder() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * optional sint32 nullOrder = 3 [default = 0]; + */ + public int getNullOrder() { + return nullOrder_; + } + private void initFields() { columnName_ = ""; order_ = 1; + nullOrder_ = 0; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -22769,6 +22801,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (((bitField0_ & 0x00000002) == 0x00000002)) { output.writeSInt32(2, order_); } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeSInt32(3, nullOrder_); + } getUnknownFields().writeTo(output); } @@ -22786,6 +22821,10 @@ public int getSerializedSize() { size += com.google.protobuf.CodedOutputStream .computeSInt32Size(2, order_); } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeSInt32Size(3, nullOrder_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -22906,6 +22945,8 @@ public Builder clear() { bitField0_ = (bitField0_ & ~0x00000001); order_ = 1; bitField0_ = (bitField0_ & ~0x00000002); + nullOrder_ = 0; + bitField0_ = (bitField0_ & ~0x00000004); return this; } @@ -22942,6 +22983,10 @@ public Builder clone() { to_bitField0_ |= 0x00000002; } result.order_ = order_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.nullOrder_ = nullOrder_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -22966,6 +23011,9 @@ public Builder mergeFrom(org.apache.hadoop.hive.metastore.hbase.HbaseMetastorePr if (other.hasOrder()) { setOrder(other.getOrder()); } + if (other.hasNullOrder()) { + setNullOrder(other.getNullOrder()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -23104,6 +23152,39 @@ public Builder clearOrder() { return this; } + // optional sint32 nullOrder = 3 [default = 0]; + private int nullOrder_ ; + /** + * optional sint32 nullOrder = 3 [default = 0]; + */ + public boolean hasNullOrder() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * optional sint32 nullOrder = 3 [default = 0]; + */ + public int getNullOrder() { + return nullOrder_; + } + /** + * optional sint32 nullOrder = 3 [default = 0]; + */ + public Builder setNullOrder(int value) { + bitField0_ |= 0x00000004; + nullOrder_ = value; + onChanged(); + return this; + } + /** + * optional sint32 nullOrder = 3 [default = 0]; + */ + public Builder clearNullOrder() { + bitField0_ = (bitField0_ & ~0x00000004); + nullOrder_ = 0; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.metastore.hbase.StorageDescriptor.Order) } @@ -34747,7 +34828,7 @@ public Builder removeRange(int index) { "ant_info\030\001 \003(\01325.org.apache.hadoop.hive." + "metastore.hbase.RoleGrantInfo\"\030\n\010RoleLis", "t\022\014\n\004role\030\001 \003(\t\"/\n\004Role\022\023\n\013create_time\030\001" + - " \001(\003\022\022\n\nowner_name\030\002 \001(\t\"\254\010\n\021StorageDesc" + + " \001(\003\022\022\n\nowner_name\030\002 \001(\t\"\302\010\n\021StorageDesc" + "riptor\022A\n\004cols\030\001 \003(\01323.org.apache.hadoop" + ".hive.metastore.hbase.FieldSchema\022\024\n\014inp" + "ut_format\030\002 \001(\t\022\025\n\routput_format\030\003 \001(\t\022\025" + @@ -34760,51 +34841,51 @@ public Builder removeRange(int index) { "skewed_info\030\t \001(\0132D.org.apache.hadoop.hi" + "ve.metastore.hbase.StorageDescriptor.Ske" + "wedInfo\022!\n\031stored_as_sub_directories\030\n \001" + - "(\010\032.\n\005Order\022\023\n\013column_name\030\001 \002(\t\022\020\n\005orde" + - "r\030\002 \001(\021:\0011\032|\n\tSerDeInfo\022\014\n\004name\030\001 \001(\t\022\031\n" + - "\021serialization_lib\030\002 \001(\t\022F\n\nparameters\030\003" + - " \001(\01322.org.apache.hadoop.hive.metastore." + - "hbase.Parameters\032\214\003\n\nSkewedInfo\022\030\n\020skewe" + - "d_col_names\030\001 \003(\t\022r\n\021skewed_col_values\030\002", - " \003(\0132W.org.apache.hadoop.hive.metastore." + - "hbase.StorageDescriptor.SkewedInfo.Skewe" + - "dColValueList\022\206\001\n\036skewed_col_value_locat" + - "ion_maps\030\003 \003(\0132^.org.apache.hadoop.hive." + - "metastore.hbase.StorageDescriptor.Skewed" + - "Info.SkewedColValueLocationMap\032.\n\022Skewed" + - "ColValueList\022\030\n\020skewed_col_value\030\001 \003(\t\0327" + - "\n\031SkewedColValueLocationMap\022\013\n\003key\030\001 \003(\t" + - "\022\r\n\005value\030\002 \002(\t\"\220\004\n\005Table\022\r\n\005owner\030\001 \001(\t" + - "\022\023\n\013create_time\030\002 \001(\003\022\030\n\020last_access_tim", - "e\030\003 \001(\003\022\021\n\tretention\030\004 \001(\003\022\020\n\010location\030\005" + - " \001(\t\022I\n\rsd_parameters\030\006 \001(\01322.org.apache" + - ".hadoop.hive.metastore.hbase.Parameters\022" + - "\017\n\007sd_hash\030\007 \002(\014\022K\n\016partition_keys\030\010 \003(\013" + - "23.org.apache.hadoop.hive.metastore.hbas" + - "e.FieldSchema\022F\n\nparameters\030\t \001(\01322.org." + - "apache.hadoop.hive.metastore.hbase.Param" + - "eters\022\032\n\022view_original_text\030\n \001(\t\022\032\n\022vie" + - "w_expanded_text\030\013 \001(\t\022\022\n\ntable_type\030\014 \001(" + - "\t\022Q\n\nprivileges\030\r \001(\0132=.org.apache.hadoo", - "p.hive.metastore.hbase.PrincipalPrivileg" + - "eSet\022\024\n\014is_temporary\030\016 \001(\010\"\353\004\n\026Partition" + - "KeyComparator\022\r\n\005names\030\001 \002(\t\022\r\n\005types\030\002 " + - "\002(\t\022S\n\002op\030\003 \003(\0132G.org.apache.hadoop.hive" + - ".metastore.hbase.PartitionKeyComparator." + - "Operator\022S\n\005range\030\004 \003(\0132D.org.apache.had" + - "oop.hive.metastore.hbase.PartitionKeyCom" + - "parator.Range\032(\n\004Mark\022\r\n\005value\030\001 \002(\t\022\021\n\t" + - "inclusive\030\002 \002(\010\032\272\001\n\005Range\022\013\n\003key\030\001 \002(\t\022R" + - "\n\005start\030\002 \001(\0132C.org.apache.hadoop.hive.m", - "etastore.hbase.PartitionKeyComparator.Ma" + - "rk\022P\n\003end\030\003 \001(\0132C.org.apache.hadoop.hive" + - ".metastore.hbase.PartitionKeyComparator." + - "Mark\032\241\001\n\010Operator\022Z\n\004type\030\001 \002(\0162L.org.ap" + - "ache.hadoop.hive.metastore.hbase.Partiti" + - "onKeyComparator.Operator.Type\022\013\n\003key\030\002 \002" + - "(\t\022\013\n\003val\030\003 \002(\t\"\037\n\004Type\022\010\n\004LIKE\020\000\022\r\n\tNOT" + - "EQUALS\020\001*#\n\rPrincipalType\022\010\n\004USER\020\000\022\010\n\004R" + - "OLE\020\001" + "(\010\032D\n\005Order\022\023\n\013column_name\030\001 \002(\t\022\020\n\005orde" + + "r\030\002 \001(\021:\0011\022\024\n\tnullOrder\030\003 \001(\021:\0010\032|\n\tSerD" + + "eInfo\022\014\n\004name\030\001 \001(\t\022\031\n\021serialization_lib" + + "\030\002 \001(\t\022F\n\nparameters\030\003 \001(\01322.org.apache." + + "hadoop.hive.metastore.hbase.Parameters\032\214" + + "\003\n\nSkewedInfo\022\030\n\020skewed_col_names\030\001 \003(\t\022", + "r\n\021skewed_col_values\030\002 \003(\0132W.org.apache." + + "hadoop.hive.metastore.hbase.StorageDescr" + + "iptor.SkewedInfo.SkewedColValueList\022\206\001\n\036" + + "skewed_col_value_location_maps\030\003 \003(\0132^.o" + + "rg.apache.hadoop.hive.metastore.hbase.St" + + "orageDescriptor.SkewedInfo.SkewedColValu" + + "eLocationMap\032.\n\022SkewedColValueList\022\030\n\020sk" + + "ewed_col_value\030\001 \003(\t\0327\n\031SkewedColValueLo" + + "cationMap\022\013\n\003key\030\001 \003(\t\022\r\n\005value\030\002 \002(\t\"\220\004" + + "\n\005Table\022\r\n\005owner\030\001 \001(\t\022\023\n\013create_time\030\002 ", + "\001(\003\022\030\n\020last_access_time\030\003 \001(\003\022\021\n\tretenti" + + "on\030\004 \001(\003\022\020\n\010location\030\005 \001(\t\022I\n\rsd_paramet" + + "ers\030\006 \001(\01322.org.apache.hadoop.hive.metas" + + "tore.hbase.Parameters\022\017\n\007sd_hash\030\007 \002(\014\022K" + + "\n\016partition_keys\030\010 \003(\01323.org.apache.hado" + + "op.hive.metastore.hbase.FieldSchema\022F\n\np" + + "arameters\030\t \001(\01322.org.apache.hadoop.hive" + + ".metastore.hbase.Parameters\022\032\n\022view_orig" + + "inal_text\030\n \001(\t\022\032\n\022view_expanded_text\030\013 " + + "\001(\t\022\022\n\ntable_type\030\014 \001(\t\022Q\n\nprivileges\030\r ", + "\001(\0132=.org.apache.hadoop.hive.metastore.h" + + "base.PrincipalPrivilegeSet\022\024\n\014is_tempora" + + "ry\030\016 \001(\010\"\353\004\n\026PartitionKeyComparator\022\r\n\005n" + + "ames\030\001 \002(\t\022\r\n\005types\030\002 \002(\t\022S\n\002op\030\003 \003(\0132G." + + "org.apache.hadoop.hive.metastore.hbase.P" + + "artitionKeyComparator.Operator\022S\n\005range\030" + + "\004 \003(\0132D.org.apache.hadoop.hive.metastore" + + ".hbase.PartitionKeyComparator.Range\032(\n\004M" + + "ark\022\r\n\005value\030\001 \002(\t\022\021\n\tinclusive\030\002 \002(\010\032\272\001" + + "\n\005Range\022\013\n\003key\030\001 \002(\t\022R\n\005start\030\002 \001(\0132C.or", + "g.apache.hadoop.hive.metastore.hbase.Par" + + "titionKeyComparator.Mark\022P\n\003end\030\003 \001(\0132C." + + "org.apache.hadoop.hive.metastore.hbase.P" + + "artitionKeyComparator.Mark\032\241\001\n\010Operator\022" + + "Z\n\004type\030\001 \002(\0162L.org.apache.hadoop.hive.m" + + "etastore.hbase.PartitionKeyComparator.Op" + + "erator.Type\022\013\n\003key\030\002 \002(\t\022\013\n\003val\030\003 \002(\t\"\037\n" + + "\004Type\022\010\n\004LIKE\020\000\022\r\n\tNOTEQUALS\020\001*#\n\rPrinci" + + "palType\022\010\n\004USER\020\000\022\010\n\004ROLE\020\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -34990,7 +35071,7 @@ public Builder removeRange(int index) { internal_static_org_apache_hadoop_hive_metastore_hbase_StorageDescriptor_Order_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_org_apache_hadoop_hive_metastore_hbase_StorageDescriptor_Order_descriptor, - new java.lang.String[] { "ColumnName", "Order", }); + new java.lang.String[] { "ColumnName", "Order", "NullOrder", }); internal_static_org_apache_hadoop_hive_metastore_hbase_StorageDescriptor_SerDeInfo_descriptor = internal_static_org_apache_hadoop_hive_metastore_hbase_StorageDescriptor_descriptor.getNestedTypes().get(1); internal_static_org_apache_hadoop_hive_metastore_hbase_StorageDescriptor_SerDeInfo_fieldAccessorTable = new diff --git metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index 81577b6..1a50e03 100644 --- metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -3101,6 +3101,10 @@ void Order::__set_order(const int32_t val) { this->order = val; } +void Order::__set_nullOrder(const int32_t val) { + this->nullOrder = val; +} + uint32_t Order::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -3138,6 +3142,14 @@ uint32_t Order::read(::apache::thrift::protocol::TProtocol* iprot) { xfer += iprot->skip(ftype); } break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->nullOrder); + this->__isset.nullOrder = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -3163,6 +3175,10 @@ uint32_t Order::write(::apache::thrift::protocol::TProtocol* oprot) const { xfer += oprot->writeI32(this->order); xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldBegin("nullOrder", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->nullOrder); + xfer += oprot->writeFieldEnd(); + xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -3172,17 +3188,20 @@ void swap(Order &a, Order &b) { using ::std::swap; swap(a.col, b.col); swap(a.order, b.order); + swap(a.nullOrder, b.nullOrder); swap(a.__isset, b.__isset); } Order::Order(const Order& other139) { col = other139.col; order = other139.order; + nullOrder = other139.nullOrder; __isset = other139.__isset; } Order& Order::operator=(const Order& other140) { col = other140.col; order = other140.order; + nullOrder = other140.nullOrder; __isset = other140.__isset; return *this; } @@ -3191,6 +3210,7 @@ void Order::printTo(std::ostream& out) const { out << "Order("; out << "col=" << to_string(col); out << ", " << "order=" << to_string(order); + out << ", " << "nullOrder=" << to_string(nullOrder); out << ")"; } diff --git metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index c501ac0..e149a5a 100644 --- metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -1571,9 +1571,10 @@ inline std::ostream& operator<<(std::ostream& out, const SerDeInfo& obj) } typedef struct _Order__isset { - _Order__isset() : col(false), order(false) {} + _Order__isset() : col(false), order(false), nullOrder(false) {} bool col :1; bool order :1; + bool nullOrder :1; } _Order__isset; class Order { @@ -1581,12 +1582,13 @@ class Order { Order(const Order&); Order& operator=(const Order&); - Order() : col(), order(0) { + Order() : col(), order(0), nullOrder(0) { } virtual ~Order() throw(); std::string col; int32_t order; + int32_t nullOrder; _Order__isset __isset; @@ -1594,12 +1596,16 @@ class Order { void __set_order(const int32_t val); + void __set_nullOrder(const int32_t val); + bool operator == (const Order & rhs) const { if (!(col == rhs.col)) return false; if (!(order == rhs.order)) return false; + if (!(nullOrder == rhs.nullOrder)) + return false; return true; } bool operator != (const Order &rhs) const { diff --git metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Order.java metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Order.java index cc0e2dd..fd05de5 100644 --- metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Order.java +++ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Order.java @@ -40,6 +40,7 @@ private static final org.apache.thrift.protocol.TField COL_FIELD_DESC = new org.apache.thrift.protocol.TField("col", org.apache.thrift.protocol.TType.STRING, (short)1); private static final org.apache.thrift.protocol.TField ORDER_FIELD_DESC = new org.apache.thrift.protocol.TField("order", org.apache.thrift.protocol.TType.I32, (short)2); + private static final org.apache.thrift.protocol.TField NULL_ORDER_FIELD_DESC = new org.apache.thrift.protocol.TField("nullOrder", org.apache.thrift.protocol.TType.I32, (short)3); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -49,11 +50,13 @@ private String col; // required private int order; // required + private int nullOrder; // required /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { COL((short)1, "col"), - ORDER((short)2, "order"); + ORDER((short)2, "order"), + NULL_ORDER((short)3, "nullOrder"); private static final Map byName = new HashMap(); @@ -72,6 +75,8 @@ public static _Fields findByThriftId(int fieldId) { return COL; case 2: // ORDER return ORDER; + case 3: // NULL_ORDER + return NULL_ORDER; default: return null; } @@ -113,6 +118,7 @@ public String getFieldName() { // isset id assignments private static final int __ORDER_ISSET_ID = 0; + private static final int __NULLORDER_ISSET_ID = 1; private byte __isset_bitfield = 0; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { @@ -121,6 +127,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.ORDER, new org.apache.thrift.meta_data.FieldMetaData("order", org.apache.thrift.TFieldRequirementType.DEFAULT, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); + tmpMap.put(_Fields.NULL_ORDER, new org.apache.thrift.meta_data.FieldMetaData("nullOrder", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(Order.class, metaDataMap); } @@ -130,12 +138,15 @@ public Order() { public Order( String col, - int order) + int order, + int nullOrder) { this(); this.col = col; this.order = order; setOrderIsSet(true); + this.nullOrder = nullOrder; + setNullOrderIsSet(true); } /** @@ -147,6 +158,7 @@ public Order(Order other) { this.col = other.col; } this.order = other.order; + this.nullOrder = other.nullOrder; } public Order deepCopy() { @@ -158,6 +170,8 @@ public void clear() { this.col = null; setOrderIsSet(false); this.order = 0; + setNullOrderIsSet(false); + this.nullOrder = 0; } public String getCol() { @@ -205,6 +219,28 @@ public void setOrderIsSet(boolean value) { __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ORDER_ISSET_ID, value); } + public int getNullOrder() { + return this.nullOrder; + } + + public void setNullOrder(int nullOrder) { + this.nullOrder = nullOrder; + setNullOrderIsSet(true); + } + + public void unsetNullOrder() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NULLORDER_ISSET_ID); + } + + /** Returns true if field nullOrder is set (has been assigned a value) and false otherwise */ + public boolean isSetNullOrder() { + return EncodingUtils.testBit(__isset_bitfield, __NULLORDER_ISSET_ID); + } + + public void setNullOrderIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NULLORDER_ISSET_ID, value); + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case COL: @@ -223,6 +259,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case NULL_ORDER: + if (value == null) { + unsetNullOrder(); + } else { + setNullOrder((Integer)value); + } + break; + } } @@ -234,6 +278,9 @@ public Object getFieldValue(_Fields field) { case ORDER: return getOrder(); + case NULL_ORDER: + return getNullOrder(); + } throw new IllegalStateException(); } @@ -249,6 +296,8 @@ public boolean isSet(_Fields field) { return isSetCol(); case ORDER: return isSetOrder(); + case NULL_ORDER: + return isSetNullOrder(); } throw new IllegalStateException(); } @@ -284,6 +333,15 @@ public boolean equals(Order that) { return false; } + boolean this_present_nullOrder = true; + boolean that_present_nullOrder = true; + if (this_present_nullOrder || that_present_nullOrder) { + if (!(this_present_nullOrder && that_present_nullOrder)) + return false; + if (this.nullOrder != that.nullOrder) + return false; + } + return true; } @@ -301,6 +359,11 @@ public int hashCode() { if (present_order) list.add(order); + boolean present_nullOrder = true; + list.add(present_nullOrder); + if (present_nullOrder) + list.add(nullOrder); + return list.hashCode(); } @@ -332,6 +395,16 @@ public int compareTo(Order other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetNullOrder()).compareTo(other.isSetNullOrder()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNullOrder()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nullOrder, other.nullOrder); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -363,6 +436,10 @@ public String toString() { sb.append("order:"); sb.append(this.order); first = false; + if (!first) sb.append(", "); + sb.append("nullOrder:"); + sb.append(this.nullOrder); + first = false; sb.append(")"); return sb.toString(); } @@ -424,6 +501,14 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Order struct) throw org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 3: // NULL_ORDER + if (schemeField.type == org.apache.thrift.protocol.TType.I32) { + struct.nullOrder = iprot.readI32(); + struct.setNullOrderIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -445,6 +530,9 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, Order struct) thro oprot.writeFieldBegin(ORDER_FIELD_DESC); oprot.writeI32(struct.order); oprot.writeFieldEnd(); + oprot.writeFieldBegin(NULL_ORDER_FIELD_DESC); + oprot.writeI32(struct.nullOrder); + oprot.writeFieldEnd(); oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -469,19 +557,25 @@ public void write(org.apache.thrift.protocol.TProtocol prot, Order struct) throw if (struct.isSetOrder()) { optionals.set(1); } - oprot.writeBitSet(optionals, 2); + if (struct.isSetNullOrder()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); if (struct.isSetCol()) { oprot.writeString(struct.col); } if (struct.isSetOrder()) { oprot.writeI32(struct.order); } + if (struct.isSetNullOrder()) { + oprot.writeI32(struct.nullOrder); + } } @Override public void read(org.apache.thrift.protocol.TProtocol prot, Order struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(2); + BitSet incoming = iprot.readBitSet(3); if (incoming.get(0)) { struct.col = iprot.readString(); struct.setColIsSet(true); @@ -490,6 +584,10 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Order struct) throws struct.order = iprot.readI32(); struct.setOrderIsSet(true); } + if (incoming.get(2)) { + struct.nullOrder = iprot.readI32(); + struct.setNullOrderIsSet(true); + } } } diff --git metastore/src/gen/thrift/gen-php/metastore/Types.php metastore/src/gen/thrift/gen-php/metastore/Types.php index 57d1daf..9f35eb6 100644 --- metastore/src/gen/thrift/gen-php/metastore/Types.php +++ metastore/src/gen/thrift/gen-php/metastore/Types.php @@ -3076,6 +3076,10 @@ class Order { * @var int */ public $order = null; + /** + * @var int + */ + public $nullOrder = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -3088,6 +3092,10 @@ class Order { 'var' => 'order', 'type' => TType::I32, ), + 3 => array( + 'var' => 'nullOrder', + 'type' => TType::I32, + ), ); } if (is_array($vals)) { @@ -3097,6 +3105,9 @@ class Order { if (isset($vals['order'])) { $this->order = $vals['order']; } + if (isset($vals['nullOrder'])) { + $this->nullOrder = $vals['nullOrder']; + } } } @@ -3133,6 +3144,13 @@ class Order { $xfer += $input->skip($ftype); } break; + case 3: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->nullOrder); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -3156,6 +3174,11 @@ class Order { $xfer += $output->writeI32($this->order); $xfer += $output->writeFieldEnd(); } + if ($this->nullOrder !== null) { + $xfer += $output->writeFieldBegin('nullOrder', TType::I32, 3); + $xfer += $output->writeI32($this->nullOrder); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; diff --git metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 77dd9a6..cfb8fb6 100644 --- metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -2237,17 +2237,20 @@ class Order: Attributes: - col - order + - nullOrder """ thrift_spec = ( None, # 0 (1, TType.STRING, 'col', None, None, ), # 1 (2, TType.I32, 'order', None, None, ), # 2 + (3, TType.I32, 'nullOrder', None, None, ), # 3 ) - def __init__(self, col=None, order=None,): + def __init__(self, col=None, order=None, nullOrder=None,): self.col = col self.order = order + self.nullOrder = nullOrder def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -2268,6 +2271,11 @@ def read(self, iprot): self.order = iprot.readI32() else: iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I32: + self.nullOrder = iprot.readI32() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -2286,6 +2294,10 @@ def write(self, oprot): oprot.writeFieldBegin('order', TType.I32, 2) oprot.writeI32(self.order) oprot.writeFieldEnd() + if self.nullOrder is not None: + oprot.writeFieldBegin('nullOrder', TType.I32, 3) + oprot.writeI32(self.nullOrder) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -2297,6 +2309,7 @@ def __hash__(self): value = 17 value = (value * 31) ^ hash(self.col) value = (value * 31) ^ hash(self.order) + value = (value * 31) ^ hash(self.nullOrder) return value def __repr__(self): diff --git metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index 2cf433b..0a812cf 100644 --- metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -557,10 +557,12 @@ class Order include ::Thrift::Struct, ::Thrift::Struct_Union COL = 1 ORDER = 2 + NULLORDER = 3 FIELDS = { COL => {:type => ::Thrift::Types::STRING, :name => 'col'}, - ORDER => {:type => ::Thrift::Types::I32, :name => 'order'} + ORDER => {:type => ::Thrift::Types::I32, :name => 'order'}, + NULLORDER => {:type => ::Thrift::Types::I32, :name => 'nullOrder'} } def struct_fields; FIELDS; end diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index be54b9c..ed20cbd 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -677,14 +677,16 @@ public void apply(StorageDescriptor t, Object[] fields) { t.setParameters(MetaStoreUtils.trimMapNulls(t.getParameters(), convertMapNullsToEmptyStrings)); } - queryText = "select \"SD_ID\", \"COLUMN_NAME\", \"SORT_COLS\".\"ORDER\" from \"SORT_COLS\"" + queryText = "select \"SD_ID\", \"COLUMN_NAME\", \"SORT_COLS\".\"ORDER\", \"SORT_COLS\".\"NULL_ORDER\"" + + " from \"SORT_COLS\"" + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0" + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc"; loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc() { @Override public void apply(StorageDescriptor t, Object[] fields) { if (fields[2] == null) return; - t.addToSortCols(new Order((String)fields[1], extractSqlInt(fields[2]))); + assert fields[3] != null; + t.addToSortCols(new Order((String)fields[1], extractSqlInt(fields[2]), extractSqlInt(fields[3]))); }}); queryText = "select \"SD_ID\", \"BUCKET_COL_NAME\" from \"BUCKETING_COLS\"" diff --git metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index b808728..9f74290 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1344,7 +1344,8 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, if (keys != null) { mkeys = new ArrayList(keys.size()); for (Order part : keys) { - mkeys.add(new MOrder(HiveStringUtils.normalizeIdentifier(part.getCol()), part.getOrder())); + mkeys.add(new MOrder(HiveStringUtils.normalizeIdentifier(part.getCol()), part.getOrder(), + part.getNullOrder())); } } return mkeys; @@ -1355,7 +1356,7 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, if (mkeys != null) { keys = new ArrayList(mkeys.size()); for (MOrder part : mkeys) { - keys.add(new Order(part.getCol(), part.getOrder())); + keys.add(new Order(part.getCol(), part.getOrder(), part.getNullOrder())); } } return keys; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java index d6d01bd..a16997b 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java @@ -707,7 +707,7 @@ static StorageDescriptor deserializeStorageDescriptor(byte[] serialized) sd.setBucketCols(new ArrayList<>(proto.getBucketColsList())); List sortCols = new ArrayList<>(); for (HbaseMetastoreProto.StorageDescriptor.Order protoOrder : proto.getSortColsList()) { - sortCols.add(new Order(protoOrder.getColumnName(), protoOrder.getOrder())); + sortCols.add(new Order(protoOrder.getColumnName(), protoOrder.getOrder(), protoOrder.getNullOrder())); } sd.setSortCols(sortCols); if (proto.hasSkewedInfo()) { diff --git metastore/src/model/org/apache/hadoop/hive/metastore/model/MOrder.java metastore/src/model/org/apache/hadoop/hive/metastore/model/MOrder.java index 1fa82a4..5370c02 100644 --- metastore/src/model/org/apache/hadoop/hive/metastore/model/MOrder.java +++ metastore/src/model/org/apache/hadoop/hive/metastore/model/MOrder.java @@ -21,14 +21,16 @@ public class MOrder { private String col; private int order; - + private int nullOrder; + /** * @param col * @param order */ - public MOrder(String col, int order) { + public MOrder(String col, int order, int nullOrder) { this.col = col; this.order = order; + this.nullOrder = nullOrder; } /** @@ -58,5 +60,19 @@ public int getOrder() { public void setOrder(int order) { this.order = order; } - + + /** + * @return the null order + */ + public int getNullOrder() { + return nullOrder; + } + + /** + * @param nullOrder the null order to set + */ + public void setNullOrder(int nullOrder) { + this.nullOrder = nullOrder; + } + } diff --git metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto index 466fdf9..552097b 100644 --- metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto +++ metastore/src/protobuf/org/apache/hadoop/hive/metastore/hbase/hbase_metastore_proto.proto @@ -205,6 +205,7 @@ message StorageDescriptor { message Order { required string column_name = 1; optional sint32 order = 2 [default = 1]; + optional sint32 nullOrder = 3 [default = 0]; } message SerDeInfo { diff --git metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java index 29d5a64..d938a03 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java @@ -18,8 +18,16 @@ */ package org.apache.hadoop.hive.metastore.hbase; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hive.conf.HiveConf; @@ -58,16 +66,8 @@ import org.junit.rules.ExpectedException; import org.mockito.Mock; import org.mockito.MockitoAnnotations; - -import java.io.IOException; -import java.security.MessageDigest; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * @@ -402,7 +402,7 @@ public void createTable() throws Exception { Map params = new HashMap(); params.put("key", "value"); StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, - serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); + serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1, 0)), params); Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null); store.createTable(table); @@ -424,6 +424,7 @@ public void createTable() throws Exception { Assert.assertEquals(1, t.getSd().getSortColsSize()); Assert.assertEquals("sortcol", t.getSd().getSortCols().get(0).getCol()); Assert.assertEquals(1, t.getSd().getSortCols().get(0).getOrder()); + Assert.assertEquals(0, t.getSd().getSortCols().get(0).getNullOrder()); Assert.assertEquals(1, t.getSd().getParametersSize()); Assert.assertEquals("value", t.getSd().getParameters().get("key")); Assert.assertEquals("me", t.getOwner()); @@ -1273,7 +1274,7 @@ private Table createMockTableAndPartition(String partType, String partVal) throw Map params = new HashMap(); params.put("key", "value"); StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, - serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); + serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1, 0)), params); int currentTime = (int)(System.currentTimeMillis() / 1000); Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols, emptyParameters, null, null, null); @@ -1291,7 +1292,7 @@ private Table createMockTable(String type) throws Exception { Map params = new HashMap(); params.put("key", "value"); StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, - serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); + serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1, 0)), params); int currentTime = (int)(System.currentTimeMillis() / 1000); Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols, emptyParameters, null, null, null); diff --git metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java index b0d7662..570d023 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java @@ -18,34 +18,30 @@ */ package org.apache.hadoop.hive.metastore.hbase; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; -import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Function; -import org.apache.hadoop.hive.metastore.api.FunctionType; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.PrincipalType; -import org.apache.hadoop.hive.metastore.api.ResourceType; -import org.apache.hadoop.hive.metastore.api.ResourceUri; -import org.apache.hadoop.hive.metastore.api.Role; import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.metastore.api.Table; @@ -58,16 +54,8 @@ import org.junit.rules.ExpectedException; import org.mockito.Mock; import org.mockito.MockitoAnnotations; - -import java.io.IOException; -import java.security.MessageDigest; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * @@ -595,7 +583,7 @@ private Table createMockTableAndPartition(String partType, String partVal) throw Map params = new HashMap(); params.put("key", "value"); StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17, - serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params); + serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1, 0)), params); int currentTime = (int)(System.currentTimeMillis() / 1000); Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols, emptyParameters, null, null, null); diff --git metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestSharedStorageDescriptor.java metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestSharedStorageDescriptor.java index e0d8ce4..8e856a1 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestSharedStorageDescriptor.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestSharedStorageDescriptor.java @@ -18,8 +18,9 @@ */ package org.apache.hadoop.hive.metastore.hbase; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.Iterator; +import java.util.List; + import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SerDeInfo; @@ -27,10 +28,8 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.junit.Assert; import org.junit.Test; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** @@ -80,19 +79,22 @@ public void changeOnUnset() { @Test public void changeOrder() { StorageDescriptor sd = new StorageDescriptor(); - sd.addToSortCols(new Order("fred", 1)); + sd.addToSortCols(new Order("fred", 1, 0)); SharedStorageDescriptor ssd = new SharedStorageDescriptor(); ssd.setShared(sd); ssd.getSortCols().get(0).setOrder(2); + ssd.getSortCols().get(0).setNullOrder(3); Assert.assertFalse(sd.getSortCols() == ssd.getSortCols()); Assert.assertEquals(2, ssd.getSortCols().get(0).getOrder()); Assert.assertEquals(1, sd.getSortCols().get(0).getOrder()); + Assert.assertEquals(3, ssd.getSortCols().get(0).getNullOrder()); + Assert.assertEquals(0, sd.getSortCols().get(0).getNullOrder()); } @Test public void unsetOrder() { StorageDescriptor sd = new StorageDescriptor(); - sd.addToSortCols(new Order("fred", 1)); + sd.addToSortCols(new Order("fred", 1, 0)); SharedStorageDescriptor ssd = new SharedStorageDescriptor(); ssd.setShared(sd); ssd.unsetSortCols(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index be6ea63..b1404ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -18,18 +18,39 @@ package org.apache.hadoop.hive.ql.exec; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; +import static org.apache.commons.lang.StringUtils.join; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; +import static org.apache.hadoop.util.StringUtils.stringifyException; + +import java.io.BufferedWriter; +import java.io.DataOutputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.io.Writer; +import java.net.URI; +import java.net.URISyntaxException; +import java.sql.SQLException; +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcSerde; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.mapreduce.MRJobConfig; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -77,6 +98,8 @@ import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.merge.MergeFileTask; import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask; import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork; @@ -177,7 +200,6 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; -import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeSpec; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe; @@ -194,42 +216,17 @@ import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.tools.HadoopArchives; import org.apache.hadoop.util.ToolRunner; import org.apache.hive.common.util.AnnotationUtils; import org.apache.hive.common.util.ReflectionUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.stringtemplate.v4.ST; -import java.io.BufferedWriter; -import java.io.DataOutputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; -import java.io.Writer; -import java.net.URI; -import java.net.URISyntaxException; -import java.sql.SQLException; -import java.util.AbstractList; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; - -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; -import static org.apache.commons.lang.StringUtils.join; -import static org.apache.hadoop.util.StringUtils.stringifyException; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; /** * DDLTask implementation. @@ -2143,6 +2140,11 @@ private int showCreateTable(Hive db, DataOutputStream outStream, String tableNam else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) { sortKeyDesc = sortKeyDesc + "DESC"; } + if (sortCol.getNullOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_NULLS_FIRST) { + sortKeyDesc = sortKeyDesc + " NULLS FIRST"; + } else if (sortCol.getNullOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_NULLS_LAST) { + sortKeyDesc = sortKeyDesc + " NULLS LAST"; + } sortKeys.add(sortKeyDesc); } tbl_sort_bucket += StringUtils.join(sortKeys, ", \n"); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index 91b5ca7..3cb21d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -648,6 +648,8 @@ protected void reloadHashTable(byte pos, int partitionId) spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap); spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi()); spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders()); + spilledMapJoinTables[pos].setNullMarkers(container.getNullMarkers()); + spilledMapJoinTables[pos].setNotNullMarkers(container.getNotNullMarkers()); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index e9cd450..2ba37b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -587,7 +587,9 @@ private void updatePartitionBucketSortColumns(Table table, Partition partn, newSortCols.add(new Order( partn.getCols().get(sortCol.getIndexes().get(0)).getName(), sortCol.getSortOrder() == '+' ? BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC : - BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC)); + BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC, + sortCol.getSortOrder() == 'a' ? BaseSemanticAnalyzer.HIVE_COLUMN_NULLS_FIRST : + BaseSemanticAnalyzer.HIVE_COLUMN_NULLS_LAST)); } else { // If the table is sorted on a partition column, not valid for sorting updateSortCols = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index fdc1dff..f6471db 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -91,6 +91,8 @@ /** The OI used to deserialize values. We never deserialize keys. */ private LazyBinaryStructObjectInspector internalValueOi; private boolean[] sortableSortOrders; + private byte[] nullMarkers; + private byte[] notNullMarkers; private MapJoinBytesTableContainer.KeyValueHelper writeHelper; private final MapJoinBytesTableContainer.DirectKeyValueWriter directWriteHelper; /* @@ -417,6 +419,14 @@ public LazyBinaryStructObjectInspector getInternalValueOi() { return sortableSortOrders; } + public byte[] getNullMarkers() { + return nullMarkers; + } + + public byte[] getNotNullMarkers() { + return notNullMarkers; + } + /* For a given row, put it into proper partition based on its hash value. * When memory threshold is reached, the biggest hash table in memory will be spilled to disk. * If the hash table of a specific partition is already on disk, all later rows will be put into @@ -708,7 +718,8 @@ public GetAdaptor() { nulls[i] = currentKey[i] == null; } return currentValue.setFromOutput( - MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, sortableSortOrders)); + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers)); } @Override @@ -723,7 +734,8 @@ public GetAdaptor() { nulls[keyIndex] = currentKey[keyIndex] == null; } return currentValue.setFromOutput( - MapJoinKey.serializeRow(output, currentKey, ois, sortableSortOrders)); + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers)); } @Override @@ -1064,6 +1076,12 @@ public void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext if (sortableSortOrders == null) { sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders(); } + if (nullMarkers == null) { + nullMarkers = ((BinarySortableSerDe) keySerde).getNullMarkers(); + } + if (notNullMarkers == null) { + notNullMarkers = ((BinarySortableSerDe) keySerde).getNotNullMarkers(); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java index 5c2ff92..a8aa71a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java @@ -25,11 +25,8 @@ import java.util.Collections; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.debug.Utils; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; @@ -53,9 +50,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -63,6 +60,8 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.HashCodeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Table container that serializes keys and values using LazyBinarySerDe into @@ -83,6 +82,8 @@ * ordering. Hence, remember the ordering here; it is null if we do use LazyBinarySerDe. */ private boolean[] sortableSortOrders; + private byte[] nullMarkers; + private byte[] notNullMarkers; private KeyValueHelper writeHelper; private DirectKeyValueWriter directWriteHelper; @@ -138,6 +139,14 @@ public void setSortableSortOrders(boolean[] sortableSortOrders) { this.sortableSortOrders = sortableSortOrders; } + public void setNullMarkers(byte[] nullMarkers) { + this.nullMarkers = nullMarkers; + } + + public void setNotNullMarkers(byte[] notNullMarkers) { + this.notNullMarkers = notNullMarkers; + } + public static interface KeyValueHelper extends BytesBytesMultiHashMap.KvSource { void setKeyValue(Writable key, Writable val) throws SerDeException; /** Get hash value from the key. */ @@ -269,7 +278,14 @@ private void sanityCheckKeyForTag() throws SerDeException { fois.add(fields.get(i).getFieldObjectInspector()); } Output output = new Output(); - BinarySortableSerDe.serializeStruct(output, data, fois, new boolean[fields.size()]); + boolean[] sortableSortOrders = new boolean[fields.size()]; + Arrays.fill(sortableSortOrders, false); + byte[] columnNullMarker = new byte[fields.size()]; + Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); + byte[] columnNotNullMarker = new byte[fields.size()]; + Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); + BinarySortableSerDe.serializeStruct(output, data, fois, sortableSortOrders, + columnNullMarker, columnNotNullMarker); hasTag = (output.getLength() != b.getLength()); if (hasTag) { LOG.error("Tag found in keys and will be removed. This should not happen."); @@ -360,10 +376,14 @@ public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeCon writeHelper = new LazyBinaryKvWriter(keySerde, valSoi, valueContext.hasFilterTag()); internalValueOi = valSoi; sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders(); + nullMarkers = ((BinarySortableSerDe) keySerde).getNullMarkers(); + notNullMarkers = ((BinarySortableSerDe) keySerde).getNotNullMarkers(); } else { writeHelper = new KeyValueWriter(keySerde, valSerde, valueContext.hasFilterTag()); internalValueOi = createInternalOi(valueContext); sortableSortOrders = null; + nullMarkers = null; + notNullMarkers = null; } } } @@ -476,7 +496,8 @@ public GetAdaptor() { nulls[i] = currentKey[i] == null; } return currentValue.setFromOutput( - MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, sortableSortOrders)); + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers)); } @Override @@ -491,7 +512,8 @@ public GetAdaptor() { nulls[keyIndex] = currentKey[keyIndex] == null; } return currentValue.setFromOutput( - MapJoinKey.serializeRow(output, currentKey, ois, sortableSortOrders)); + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers)); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index cfb9abc..9f27f56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -117,7 +117,8 @@ public static MapJoinKey readFromVector(Output output, MapJoinKey key, Object[] */ public static Output serializeVector(Output byteStream, VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, - boolean[] nulls, boolean[] sortableSortOrders) throws HiveException, SerDeException { + boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) + throws HiveException, SerDeException { Object[] fieldData = new Object[keyOutputWriters.length]; List fieldOis = new ArrayList(); for (int i = 0; i < keyOutputWriters.length; ++i) { @@ -130,7 +131,8 @@ public static Output serializeVector(Output byteStream, VectorHashKeyWrapper kw, nulls[i] = (fieldData[i] == null); } } - return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders); + return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, + nullMarkers, notNullMarkers); } public static MapJoinKey readFromRow(Output output, MapJoinKey key, Object[] keyObject, @@ -145,7 +147,8 @@ public static MapJoinKey readFromRow(Output output, MapJoinKey key, Object[] key * @param byteStream Output to reuse. Can be null, in that case a new one would be created. */ public static Output serializeRow(Output byteStream, Object[] fieldData, - List fieldOis, boolean[] sortableSortOrders) throws HiveException { + List fieldOis, boolean[] sortableSortOrders, + byte[] nullMarkers, byte[] notNullMarkers) throws HiveException { if (byteStream == null) { byteStream = new Output(); } else { @@ -157,7 +160,8 @@ public static Output serializeRow(Output byteStream, Object[] fieldData, } else if (sortableSortOrders == null) { LazyBinarySerDe.serializeStruct(byteStream, fieldData, fieldOis); } else { - BinarySortableSerDe.serializeStruct(byteStream, fieldData, fieldOis, sortableSortOrders); + BinarySortableSerDe.serializeStruct(byteStream, fieldData, fieldOis, sortableSortOrders, + nullMarkers, notNullMarkers); } } catch (SerDeException e) { throw new HiveException("Serialization error", e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java index 7bdd11a..fcf18c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java @@ -27,9 +27,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.Counter; import org.apache.hadoop.hive.ql.exec.TerminalOperator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.Counter; import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; @@ -39,14 +39,15 @@ import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -188,6 +189,60 @@ public VectorReduceSinkCommonOperator(CompilationOpContext ctx, return columnSortOrderIsDesc; } + private byte[] getColumnNullMarker(Properties properties, int columnCount, boolean[] columnSortOrder) { + String columnNullOrder = properties.getProperty(serdeConstants.SERIALIZATION_NULL_POSITION); + byte[] columnNullMarker = new byte[columnCount]; + for (int i = 0; i < columnNullMarker.length; i++) { + if (columnSortOrder[i]) { + // Descending + if (columnNullOrder != null && columnNullOrder.charAt(i) == 'a') { + // Null first + columnNullMarker[i] = BinarySortableSerDe.ONE; + } else { + // Null last (default for descending order) + columnNullMarker[i] = BinarySortableSerDe.ZERO; + } + } else { + // Ascending + if (columnNullOrder != null && columnNullOrder.charAt(i) == 'z') { + // Null last + columnNullMarker[i] = BinarySortableSerDe.ONE; + } else { + // Null first (default for ascending order) + columnNullMarker[i] = BinarySortableSerDe.ZERO; + } + } + } + return columnNullMarker; + } + + private byte[] getColumnNotNullMarker(Properties properties, int columnCount, boolean[] columnSortOrder) { + String columnNullOrder = properties.getProperty(serdeConstants.SERIALIZATION_NULL_POSITION); + byte[] columnNotNullMarker = new byte[columnCount]; + for (int i = 0; i < columnNotNullMarker.length; i++) { + if (columnSortOrder[i]) { + // Descending + if (columnNullOrder != null && columnNullOrder.charAt(i) == 'a') { + // Null first + columnNotNullMarker[i] = BinarySortableSerDe.ZERO; + } else { + // Null last (default for descending order) + columnNotNullMarker[i] = BinarySortableSerDe.ONE; + } + } else { + // Ascending + if (columnNullOrder != null && columnNullOrder.charAt(i) == 'z') { + // Null last + columnNotNullMarker[i] = BinarySortableSerDe.ZERO; + } else { + // Null first (default for ascending order) + columnNotNullMarker[i] = BinarySortableSerDe.ONE; + } + } + } + return columnNotNullMarker; + } + @Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); @@ -217,8 +272,13 @@ protected void initializeOp(Configuration hconf) throws HiveException { TableDesc keyTableDesc = conf.getKeySerializeInfo(); boolean[] columnSortOrder = getColumnSortOrder(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length); + byte[] columnNullMarker = + getColumnNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder); + byte[] columnNotNullMarker = + getColumnNotNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder); - keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrder); + keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrder, + columnNullMarker, columnNotNullMarker); // Create all nulls key. try { diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 75d2519..71be225 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -936,7 +936,7 @@ public void createIndex(String tableName, String indexName, String indexHandlerC FieldSchema col = cols.get(i); if (indexedCols.contains(col.getName())) { indexTblCols.add(col); - sortCols.add(new Order(col.getName(), 1)); + sortCols.add(new Order(col.getName(), 1, 0)); k++; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java index b57dc77..677649d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java @@ -357,7 +357,8 @@ private boolean checkSortColsAndJoinCols(List sortCols, Order o = sortCols.get(pos); if (pos < sortColumnsFirstPartition.size()) { - if (o.getOrder() != sortColumnsFirstPartition.get(pos).getOrder()) { + if (o.getOrder() != sortColumnsFirstPartition.get(pos).getOrder() || + o.getNullOrder() != sortColumnsFirstPartition.get(pos).getNullOrder()) { return false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java index d5f3057..3d580d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java @@ -148,23 +148,55 @@ public BucketSortReduceSinkProcessor(ParseContext pGraphContext) { // Get the sort positions and sort order for the table // The sort order contains whether the sorting is happening ascending or descending - private ObjectPair, List> getSortPositionsOrder( + private List getSortPositions( List tabSortCols, List tabCols) { List sortPositions = new ArrayList(); - List sortOrders = new ArrayList(); for (Order sortCol : tabSortCols) { int pos = 0; for (FieldSchema tabCol : tabCols) { if (sortCol.getCol().equals(tabCol.getName())) { sortPositions.add(pos); + break; + } + pos++; + } + } + return sortPositions; + } + + private List getSortOrder( + List tabSortCols, + List tabCols) { + List sortOrders = new ArrayList(); + for (Order sortCol : tabSortCols) { + int pos = 0; + for (FieldSchema tabCol : tabCols) { + if (sortCol.getCol().equals(tabCol.getName())) { sortOrders.add(sortCol.getOrder()); break; } pos++; } } - return new ObjectPair, List>(sortPositions, sortOrders); + return sortOrders; + } + + private List getNullSortOrder( + List tabSortCols, + List tabCols) { + List nullSortOrders = new ArrayList(); + for (Order sortCol : tabSortCols) { + int pos = 0; + for (FieldSchema tabCol : tabCols) { + if (sortCol.getCol().equals(tabCol.getName())) { + nullSortOrders.add(sortCol.getNullOrder()); + break; + } + pos++; + } + } + return nullSortOrders; } // Return true if the partition is bucketed/sorted by the specified positions @@ -174,6 +206,7 @@ private boolean checkPartition(Partition partition, List bucketPositionsDest, List sortPositionsDest, List sortOrderDest, + List sortNullOrderDest, int numBucketsDest) { // The bucketing and sorting positions should exactly match int numBuckets = partition.getBucketCount(); @@ -183,11 +216,16 @@ private boolean checkPartition(Partition partition, List partnBucketPositions = getBucketPositions(partition.getBucketCols(), partition.getTable().getCols()); - ObjectPair, List> partnSortPositionsOrder = - getSortPositionsOrder(partition.getSortCols(), partition.getTable().getCols()); + List sortPositions = + getSortPositions(partition.getSortCols(), partition.getTable().getCols()); + List sortOrder = + getSortOrder(partition.getSortCols(), partition.getTable().getCols()); + List sortNullOrder = + getNullSortOrder(partition.getSortCols(), partition.getTable().getCols()); return bucketPositionsDest.equals(partnBucketPositions) && - sortPositionsDest.equals(partnSortPositionsOrder.getFirst()) && - sortOrderDest.equals(partnSortPositionsOrder.getSecond()); + sortPositionsDest.equals(sortPositions) && + sortOrderDest.equals(sortOrder) && + sortNullOrderDest.equals(sortNullOrder); } // Return true if the table is bucketed/sorted by the specified positions @@ -197,6 +235,7 @@ private boolean checkTable(Table table, List bucketPositionsDest, List sortPositionsDest, List sortOrderDest, + List sortNullOrderDest, int numBucketsDest) { // The bucketing and sorting positions should exactly match int numBuckets = table.getNumBuckets(); @@ -206,11 +245,16 @@ private boolean checkTable(Table table, List tableBucketPositions = getBucketPositions(table.getBucketCols(), table.getCols()); - ObjectPair, List> tableSortPositionsOrder = - getSortPositionsOrder(table.getSortCols(), table.getCols()); + List sortPositions = + getSortPositions(table.getSortCols(), table.getCols()); + List sortOrder = + getSortOrder(table.getSortCols(), table.getCols()); + List sortNullOrder = + getNullSortOrder(table.getSortCols(), table.getCols()); return bucketPositionsDest.equals(tableBucketPositions) && - sortPositionsDest.equals(tableSortPositionsOrder.getFirst()) && - sortOrderDest.equals(tableSortPositionsOrder.getSecond()); + sortPositionsDest.equals(sortPositions) && + sortOrderDest.equals(sortOrder) && + sortNullOrderDest.equals(sortNullOrder); } // Store the bucket path to bucket number mapping in the table scan operator. @@ -288,7 +332,8 @@ private int findColumnPosition(List cols, String colName) { private boolean validateSMBJoinKeys(SMBJoinDesc smbJoinDesc, List sourceTableBucketCols, List sourceTableSortCols, - List sortOrder) { + List sortOrder, + List sortNullOrder) { // The sort-merge join creates the output sorted and bucketized by the same columns. // This can be relaxed in the future if there is a requirement. if (!sourceTableBucketCols.equals(sourceTableSortCols)) { @@ -426,10 +471,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // also match for this to be converted to a map-only job. List bucketPositions = getBucketPositions(destTable.getBucketCols(), destTable.getCols()); - ObjectPair, List> sortOrderPositions = - getSortPositionsOrder(destTable.getSortCols(), destTable.getCols()); - List sortPositions = sortOrderPositions.getFirst(); - List sortOrder = sortOrderPositions.getSecond(); + List sortPositions = + getSortPositions(destTable.getSortCols(), destTable.getCols()); + List sortOrder = + getSortOrder(destTable.getSortCols(), destTable.getCols()); + List sortNullOrder = + getNullSortOrder(destTable.getSortCols(), destTable.getCols()); boolean useBucketSortPositions = true; // Only selects and filters are allowed @@ -464,7 +511,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } if (!validateSMBJoinKeys(smbJoinDesc, sourceTableBucketCols, - sourceTableSortCols, sortOrder)) { + sourceTableSortCols, sortOrder, sortNullOrder)) { return null; } @@ -539,7 +586,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } for (Partition partition : partitions) { if (!checkPartition(partition, newBucketPositions, newSortPositions, sortOrder, - numBucketsDestination)) { + sortNullOrder, numBucketsDestination)) { return null; } } @@ -550,7 +597,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else { if (!checkTable(srcTable, newBucketPositions, newSortPositions, sortOrder, - numBucketsDestination)) { + sortNullOrder, numBucketsDestination)) { return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java index c38c6d7..2d5dbfd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java @@ -18,18 +18,16 @@ package org.apache.hadoop.hive.ql.optimizer; +import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.FIXED; + import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -60,11 +58,11 @@ import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.TezWork.VertexType; import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Sets; -import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.FIXED; - public class ReduceSinkMapJoinProc implements NodeProcessor { private final static Logger LOG = LoggerFactory.getLogger(ReduceSinkMapJoinProc.class.getName()); @@ -347,11 +345,14 @@ public static Object processReduceSinkToHashJoin(ReduceSinkOperator parentRS, Ma Map> keyExprMap = mapJoinOp.getConf().getKeys(); List keyCols = keyExprMap.get(Byte.valueOf((byte) 0)); StringBuilder keyOrder = new StringBuilder(); + StringBuilder nullPosition = new StringBuilder(); for (ExprNodeDesc k: keyCols) { keyOrder.append("+"); + nullPosition.append("a"); } TableDesc keyTableDesc = PlanUtils.getReduceKeyTableDesc(PlanUtils - .getFieldSchemasFromColumnList(keyCols, "mapjoinkey"), keyOrder.toString()); + .getFieldSchemasFromColumnList(keyCols, "mapjoinkey"), keyOrder.toString(), + nullPosition.toString()); mapJoinOp.getConf().setKeyTableDesc(keyTableDesc); // let the dummy op be the parent of mapjoin op diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 27b0457..150cbad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; @@ -180,10 +181,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // Get the positions for partition, bucket and sort columns List bucketPositions = getBucketPositions(destTable.getBucketCols(), destTable.getCols()); - ObjectPair, List> sortOrderPositions = getSortPositionsOrder( - destTable.getSortCols(), destTable.getCols()); List sortPositions = null; List sortOrder = null; + List sortNullOrder = null; if (fsOp.getConf().getWriteType() == AcidUtils.Operation.UPDATE || fsOp.getConf().getWriteType() == AcidUtils.Operation.DELETE) { // When doing updates and deletes we always want to sort on the rowid because the ACID @@ -191,13 +191,16 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // ignore whatever comes from the table and enforce this sort order instead. sortPositions = Arrays.asList(0); sortOrder = Arrays.asList(1); // 1 means asc, could really use enum here in the thrift if + sortNullOrder = Arrays.asList(0); } else { - sortPositions = sortOrderPositions.getFirst(); - sortOrder = sortOrderPositions.getSecond(); + sortPositions = getSortPositions(destTable.getSortCols(), destTable.getCols()); + sortOrder = getSortOrders(destTable.getSortCols(), destTable.getCols()); + sortNullOrder = getSortNullOrders(destTable.getSortCols(), destTable.getCols()); } LOG.debug("Got sort order"); for (int i : sortPositions) LOG.debug("sort position " + i); for (int i : sortOrder) LOG.debug("sort order " + i); + for (int i : sortNullOrder) LOG.debug("sort null order " + i); List partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema()); List colInfos = fsParent.getSchema().getSignature(); ArrayList bucketColumns = getPositionsToExprNodes(bucketPositions, colInfos); @@ -214,7 +217,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (ColumnInfo ci : valColInfo) { newValueCols.add(new ExprNodeColumnDesc(ci)); } - ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder, + ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder, sortNullOrder, newValueCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType()); if (!bucketColumns.isEmpty()) { @@ -384,17 +387,19 @@ private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { } public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, - List sortPositions, List sortOrder, ArrayList newValueCols, - ArrayList bucketColumns, int numBuckets, + List sortPositions, List sortOrder, List sortNullOrder, + ArrayList newValueCols, ArrayList bucketColumns, int numBuckets, Operator parent, AcidUtils.Operation writeType) { // Order of KEY columns // 1) Partition columns // 2) Bucket number column // 3) Sort columns + // 4) Null sort columns List keyColsPosInVal = Lists.newArrayList(); ArrayList newKeyCols = Lists.newArrayList(); List newSortOrder = Lists.newArrayList(); + List newSortNullOrder = Lists.newArrayList(); int numPartAndBuck = partitionPositions.size(); keyColsPosInVal.addAll(partitionPositions); @@ -425,6 +430,30 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, } } + // if partition and bucket columns are sorted in ascending order, by default + // nulls come first; otherwise nulls come last + Integer nullOrder = order == 1 ? 0 : 1; + if (sortNullOrder != null && !sortNullOrder.isEmpty()) { + if (sortNullOrder.get(0).intValue() == 0) { + nullOrder = 0; + } else { + nullOrder = 1; + } + } + for (int i = 0; i < numPartAndBuck; i++) { + newSortNullOrder.add(nullOrder); + } + newSortNullOrder.addAll(sortNullOrder); + + String nullOrderStr = ""; + for (Integer i : newSortNullOrder) { + if(i.intValue() == 0) { + nullOrderStr += "a"; + } else { + nullOrderStr += "z"; + } + } + ArrayList newPartCols = Lists.newArrayList(); // we will clone here as RS will update bucket column key with its @@ -451,9 +480,11 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, ReduceSinkOperator.class); if (parentRSOp != null && parseCtx.getQueryProperties().hasOuterOrderBy()) { String parentRSOpOrder = parentRSOp.getConf().getOrder(); + String parentRSOpNullOrder = parentRSOp.getConf().getNullPosition(); if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) { newKeyCols.addAll(parentRSOp.getConf().getKeyCols()); orderStr += parentRSOpOrder; + nullOrderStr += parentRSOpNullOrder; } } @@ -462,7 +493,7 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, // from Key and Value TableDesc List fields = PlanUtils.getFieldSchemasFromColumnList(newKeyCols, "reducesinkkey"); - TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, orderStr); + TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, orderStr, nullOrderStr); ArrayList outputKeyCols = Lists.newArrayList(); for (int i = 0; i < newKeyCols.size(); i++) { outputKeyCols.add("reducesinkkey" + i); @@ -490,27 +521,65 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, } /** - * Get the sort positions and sort order for the sort columns + * Get the sort positions for the sort columns * @param tabSortCols * @param tabCols * @return */ - private ObjectPair, List> getSortPositionsOrder(List tabSortCols, + private List getSortPositions(List tabSortCols, List tabCols) { List sortPositions = Lists.newArrayList(); - List sortOrders = Lists.newArrayList(); for (Order sortCol : tabSortCols) { int pos = 0; for (FieldSchema tabCol : tabCols) { if (sortCol.getCol().equals(tabCol.getName())) { sortPositions.add(pos); - sortOrders.add(sortCol.getOrder()); break; } pos++; } } - return new ObjectPair, List>(sortPositions, sortOrders); + return sortPositions; + } + + /** + * Get the sort order for the sort columns + * @param tabSortCols + * @param tabCols + * @return + */ + private List getSortOrders(List tabSortCols, + List tabCols) { + List sortOrders = Lists.newArrayList(); + for (Order sortCol : tabSortCols) { + for (FieldSchema tabCol : tabCols) { + if (sortCol.getCol().equals(tabCol.getName())) { + sortOrders.add(sortCol.getOrder()); + break; + } + } + } + return sortOrders; + } + + /** + * Get the null sort order for the sort columns + * @param tabSortCols + * @param tabCols + * @return + */ + private List getSortNullOrders(List tabSortCols, + List tabCols) { + List sortNullOrders = Lists.newArrayList(); + for (Order sortCol : tabSortCols) { + for (FieldSchema tabCol : tabCols) { + if (sortCol.getCol().equals(tabCol.getName())) { + sortNullOrders.add(sortCol.getNullOrder()); + break; + } + } + } + return sortNullOrders; } private ArrayList getPositionsToExprNodes(List pos, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 02db680..a95da0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -33,6 +33,7 @@ import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelFieldCollation.NullDirection; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.type.RelDataType; @@ -165,7 +166,14 @@ public RelNode toRel(ToRelContext context) { else { direction = Direction.DESCENDING; } - collationList.add(new RelFieldCollation(i,direction)); + NullDirection nullDirection; + if (sortColumn.getNullOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_NULLS_FIRST) { + nullDirection = NullDirection.FIRST; + } + else { + nullDirection = NullDirection.LAST; + } + collationList.add(new RelFieldCollation(i,direction,nullDirection)); break; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 3f2267d..b2a7ec3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -52,8 +52,6 @@ import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.ImmutableBitSet; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; @@ -64,6 +62,8 @@ import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseDriver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Iterables; @@ -226,6 +226,24 @@ private void convertOrderLimitToASTNode(HiveSortLimit order) { ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); + ASTNode nullDirectionAST; + // Null direction + if (c.nullDirection == RelFieldCollation.NullDirection.FIRST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else if (c.nullDirection == RelFieldCollation.NullDirection.LAST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); + } else { + // Default + if (c.getDirection() == RelFieldCollation.Direction.ASCENDING) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); + } + } // 3 Convert OB expr (OB Expr is usually an input ref except for top // level OB; top level OB will have RexCall kept in a map.) @@ -245,7 +263,7 @@ private void convertOrderLimitToASTNode(HiveSortLimit order) { } // 4 buildup the ob expr AST - directionAST.addChild(astCol); + nullDirectionAST.addChild(astCol); orderAst.addChild(directionAST); } hiveAST.order = orderAst; @@ -430,12 +448,31 @@ private ASTNode getPSpecAST(RexWindow window) { if (window.orderKeys != null && !window.orderKeys.isEmpty()) { oByAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY"); for (RexFieldCollation ok : window.orderKeys) { - ASTNode astNode = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder + ASTNode directionAST = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder .createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder .createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC"); + ASTNode nullDirectionAST; + // Null direction + if (ok.getNullDirection() == RelFieldCollation.NullDirection.FIRST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else if (ok.getNullDirection() == RelFieldCollation.NullDirection.LAST) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); + } else { + // Default + if (ok.getDirection() == RelFieldCollation.Direction.ASCENDING) { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST"); + directionAST.addChild(nullDirectionAST); + } else { + nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST"); + directionAST.addChild(nullDirectionAST); + } + } ASTNode astCol = ok.left.accept(this); - astNode.addChild(astCol); - oByAst.addChild(astNode); + + nullDirectionAST.addChild(astCol); + oByAst.addChild(directionAST); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index b42e78f..a56408e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.RexVisitor; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.Schema; import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; @@ -278,7 +279,18 @@ private PartitioningSpec getPSpec(RexWindow window) { OrderExpression exprSpec = new OrderExpression(); Order order = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? Order.ASC : Order.DESC; + NullOrder nullOrder; + if ( ok.getNullDirection() == RelFieldCollation.NullDirection.FIRST ) { + nullOrder = NullOrder.NULLS_FIRST; + } else if ( ok.getNullDirection() == RelFieldCollation.NullDirection.FIRST ) { + nullOrder = NullOrder.NULLS_LAST; + } else { + // Default + nullOrder = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? + NullOrder.NULLS_FIRST : NullOrder.NULLS_LAST; + } exprSpec.setOrder(order); + exprSpec.setNullOrder(nullOrder); ASTNode astNode = ok.left.accept(new RexVisitor(schema)); exprSpec.setExpression(astNode); oSpec.addExpression(exprSpec); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java index a0e374c..c2a7445 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java @@ -41,8 +41,6 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -65,8 +63,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import org.apache.hadoop.hive.ql.parse.JoinCond; @@ -99,6 +97,8 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -435,6 +435,7 @@ OpAttr visit(HiveSortLimit sortRel) throws SemanticException { Map obRefToCallMap = sortRel.getInputRefToCallMap(); List sortCols = new ArrayList(); StringBuilder order = new StringBuilder(); + StringBuilder nullOrder = new StringBuilder(); for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) { int sortColumnPos = sortInfo.getFieldIndex(); ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature() @@ -447,6 +448,14 @@ OpAttr visit(HiveSortLimit sortRel) throws SemanticException { } else { order.append("+"); } + if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) { + nullOrder.append("a"); + } else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) { + nullOrder.append("z"); + } else { + // Default + nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a"); + } if (obRefToCallMap != null) { RexNode obExpr = obRefToCallMap.get(sortColumnPos); @@ -474,7 +483,7 @@ OpAttr visit(HiveSortLimit sortRel) throws SemanticException { // 1.b. Generate reduce sink and project operator resultOp = genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList(), - order.toString(), numReducers, Operation.NOT_ACID, hiveConf, keepColumns); + order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveConf, keepColumns); } // 2. If we need to generate limit @@ -626,6 +635,7 @@ private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticExce ArrayList keyCols = new ArrayList(); ArrayList partCols = new ArrayList(); StringBuilder order = new StringBuilder(); + StringBuilder nullOrder = new StringBuilder(); for (PartitionExpression partCol : wSpec.getQueryPartitionSpec().getExpressions()) { ExprNodeDesc partExpr = semanticAnalyzer.genExprNodeDesc(partCol.getExpression(), rr); @@ -633,6 +643,7 @@ private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticExce keyCols.add(partExpr); partCols.add(partExpr); order.append('+'); + nullOrder.append('a'); } } @@ -640,19 +651,22 @@ private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticExce for (OrderExpression orderCol : wSpec.getQueryOrderSpec().getExpressions()) { ExprNodeDesc orderExpr = semanticAnalyzer.genExprNodeDesc(orderCol.getExpression(), rr); char orderChar = orderCol.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-'; + char nullOrderChar = orderCol.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z'; int index = ExprNodeDescUtils.indexOf(orderExpr, keyCols); if (index >= 0) { order.setCharAt(index, orderChar); + nullOrder.setCharAt(index, nullOrderChar); continue; } keyCols.add(orderExpr); order.append(orderChar); + nullOrder.append(nullOrderChar); } } SelectOperator selectOp = genReduceSinkAndBacktrackSelect(input, keyCols.toArray(new ExprNodeDesc[keyCols.size()]), 0, partCols, - order.toString(), -1, Operation.NOT_ACID, hiveConf); + order.toString(), nullOrder.toString(), -1, Operation.NOT_ACID, hiveConf); // 2. Finally create PTF PTFTranslator translator = new PTFTranslator(); @@ -677,14 +691,14 @@ private OpAttr genPTF(OpAttr inputOpAf, WindowingSpec wSpec) throws SemanticExce private static SelectOperator genReduceSinkAndBacktrackSelect(Operator input, ExprNodeDesc[] keys, int tag, ArrayList partitionCols, String order, - int numReducers, Operation acidOperation, HiveConf hiveConf) + String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf) throws SemanticException { - return genReduceSinkAndBacktrackSelect(input, keys, tag, partitionCols, order, + return genReduceSinkAndBacktrackSelect(input, keys, tag, partitionCols, order, nullOrder, numReducers, acidOperation, hiveConf, input.getSchema().getColumnNames()); } private static SelectOperator genReduceSinkAndBacktrackSelect(Operator input, - ExprNodeDesc[] keys, int tag, ArrayList partitionCols, String order, + ExprNodeDesc[] keys, int tag, ArrayList partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf, List keepColNames) throws SemanticException { // 1. Generate RS operator @@ -715,7 +729,8 @@ private static SelectOperator genReduceSinkAndBacktrackSelect(Operator input, "In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none"); } // 1.2 Now generate RS operator - ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, numReducers, acidOperation, hiveConf); + ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, + nullOrder, numReducers, acidOperation, hiveConf); // 2. Generate backtrack Select operator Map descriptors = buildBacktrackFromReduceSink(keepColNames, @@ -737,13 +752,13 @@ private static SelectOperator genReduceSinkAndBacktrackSelect(Operator input, private static ReduceSinkOperator genReduceSink(Operator input, String tableAlias, ExprNodeDesc[] keys, int tag, int numReducers, Operation acidOperation, HiveConf hiveConf) throws SemanticException { - return genReduceSink(input, tableAlias, keys, tag, new ArrayList(), "", numReducers, + return genReduceSink(input, tableAlias, keys, tag, new ArrayList(), "", "", numReducers, acidOperation, hiveConf); } @SuppressWarnings({ "rawtypes", "unchecked" }) private static ReduceSinkOperator genReduceSink(Operator input, String tableAlias, ExprNodeDesc[] keys, int tag, - ArrayList partitionCols, String order, int numReducers, + ArrayList partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf) throws SemanticException { Operator dummy = Operator.createDummy(); // dummy for backtracking dummy.setParentOperators(Arrays.asList(input)); @@ -818,7 +833,7 @@ private static ReduceSinkOperator genReduceSink(Operator input, String tableA reduceKeys.size(), numReducers, acidOperation); } else { rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, - partitionCols, order, numReducers, acidOperation); + partitionCols, order, nullOrder, numReducers, acidOperation); } ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild( diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 638b91e..fdd9202 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -313,7 +313,8 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR pRS.getConf().setNumDistributionKeys(cRS.getConf().getNumDistributionKeys()); List fields = PlanUtils.getFieldSchemasFromColumnList(pRS.getConf() .getKeyCols(), "reducesinkkey"); - TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, pRS.getConf().getOrder()); + TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, pRS.getConf().getOrder(), + pRS.getConf().getNullPosition()); ArrayList outputKeyCols = Lists.newArrayList(); for (int i = 0; i < fields.size(); i++) { outputKeyCols.add(fields.get(i).getName()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java index f48fac1..237c70a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java @@ -18,10 +18,13 @@ package org.apache.hadoop.hive.ql.optimizer.spark; -import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; @@ -52,13 +55,10 @@ import org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Stack; +import com.google.common.base.Preconditions; public class SparkReduceSinkMapJoinProc implements NodeProcessor { @@ -209,11 +209,14 @@ public Object process(Node nd, Stack stack, Map> keyExprMap = mapJoinOp.getConf().getKeys(); List keyCols = keyExprMap.get(Byte.valueOf((byte) 0)); StringBuilder keyOrder = new StringBuilder(); + StringBuilder nullPosition = new StringBuilder(); for (int i = 0; i < keyCols.size(); i++) { keyOrder.append("+"); + nullPosition.append("a"); } TableDesc keyTableDesc = PlanUtils.getReduceKeyTableDesc(PlanUtils - .getFieldSchemasFromColumnList(keyCols, "mapjoinkey"), keyOrder.toString()); + .getFieldSchemasFromColumnList(keyCols, "mapjoinkey"), keyOrder.toString(), + nullPosition.toString()); mapJoinOp.getConf().setKeyTableDesc(keyTableDesc); // let the dummy op be the parent of mapjoin op diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index af1ee20..1638784 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -105,6 +105,8 @@ public static int HIVE_COLUMN_ORDER_ASC = 1; public static int HIVE_COLUMN_ORDER_DESC = 0; + public static int HIVE_COLUMN_NULLS_FIRST = 0; + public static int HIVE_COLUMN_NULLS_LAST = 1; /** * ReadEntitites that are passed to the hooks. @@ -653,11 +655,23 @@ private static String spliceString(String str, int i, int length, String replace for (int i = 0; i < numCh; i++) { ASTNode child = (ASTNode) ast.getChild(i); if (child.getToken().getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { - colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()).toLowerCase(), - HIVE_COLUMN_ORDER_ASC)); + child = (ASTNode) child.getChild(0); + if (child.getToken().getType() == HiveParser.TOK_NULLS_FIRST) { + colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()).toLowerCase(), + HIVE_COLUMN_ORDER_ASC, HIVE_COLUMN_NULLS_FIRST)); + } else { + colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()).toLowerCase(), + HIVE_COLUMN_ORDER_ASC, HIVE_COLUMN_NULLS_LAST)); + } } else { - colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()).toLowerCase(), - HIVE_COLUMN_ORDER_DESC)); + child = (ASTNode) child.getChild(0); + if (child.getToken().getType() == HiveParser.TOK_NULLS_LAST) { + colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()).toLowerCase(), + HIVE_COLUMN_ORDER_DESC, HIVE_COLUMN_NULLS_LAST)); + } else { + colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()).toLowerCase(), + HIVE_COLUMN_ORDER_DESC, HIVE_COLUMN_NULLS_FIRST)); + } } } return colList; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e2d404b..a989c64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2341,6 +2341,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException List obASTExprLst = obAST.getChildren(); ASTNode obASTExpr; + ASTNode nullObASTExpr; List> vcASTTypePairs = new ArrayList>(); RowResolver inputRR = relToHiveRR.get(srcRel); RowResolver outputRR = new RowResolver(); @@ -2353,9 +2354,11 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException for (int i = 0; i < obASTExprLst.size(); i++) { // 2.1 Convert AST Expr to ExprNode obASTExpr = (ASTNode) obASTExprLst.get(i); + nullObASTExpr = (ASTNode) obASTExpr.getChild(0); + ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( obASTExpr, new TypeCheckCtx(inputRR)); - ExprNodeDesc obExprNDesc = astToExprNDescMap.get(obASTExpr.getChild(0)); + ExprNodeDesc obExprNDesc = astToExprNDescMap.get(ref); if (obExprNDesc == null) throw new SemanticException("Invalid order by expression: " + obASTExpr.toString()); @@ -2370,18 +2373,26 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException } else { fieldIndex = srcRelRecordSz + newVCLst.size(); newVCLst.add(rnd); - vcASTTypePairs.add(new Pair((ASTNode) obASTExpr.getChild(0), - obExprNDesc.getTypeInfo())); + vcASTTypePairs.add(new Pair(ref, obExprNDesc.getTypeInfo())); } // 2.4 Determine the Direction of order by - org.apache.calcite.rel.RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; + RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; if (obASTExpr.getType() == HiveParser.TOK_TABSORTCOLNAMEASC) { order = RelFieldCollation.Direction.ASCENDING; } + RelFieldCollation.NullDirection nullOrder; + if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_FIRST) { + nullOrder = RelFieldCollation.NullDirection.FIRST; + } else if (nullObASTExpr.getType() == HiveParser.TOK_NULLS_LAST) { + nullOrder = RelFieldCollation.NullDirection.LAST; + } else { + throw new SemanticException( + "Unexpected null ordering option: " + nullObASTExpr.getType()); + } // 2.5 Add to field collations - fieldCollations.add(new RelFieldCollation(fieldIndex, order)); + fieldCollations.add(new RelFieldCollation(fieldIndex, order, nullOrder)); } // 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 4c4470b..ac580f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -52,6 +52,8 @@ KW_EXISTS : 'EXISTS'; KW_ASC : 'ASC'; KW_DESC : 'DESC'; +KW_NULLS : 'NULLS'; +KW_LAST : 'LAST'; KW_ORDER : 'ORDER'; KW_GROUP : 'GROUP'; KW_BY : 'BY'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 5f14c6b..42f4090 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -81,6 +81,8 @@ TOK_GROUPING_SETS; TOK_GROUPING_SETS_EXPRESSION; TOK_HAVING; TOK_ORDERBY; +TOK_NULLS_FIRST; +TOK_NULLS_LAST; TOK_CLUSTERBY; TOK_DISTRIBUTEBY; TOK_SORTBY; @@ -401,6 +403,8 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_ASC", "ASC"); xlateMap.put("KW_DESC", "DESC"); + xlateMap.put("KW_NULLS", "NULLS"); + xlateMap.put("KW_LAST", "LAST"); xlateMap.put("KW_ORDER", "ORDER"); xlateMap.put("KW_BY", "BY"); xlateMap.put("KW_GROUP", "GROUP"); @@ -2005,13 +2009,34 @@ skewedValueLocationElement skewedColumnValue | skewedColumnValuePair ; - + +orderSpecification +@init { pushMsg("order specification", state); } +@after { popMsg(state); } + : KW_ASC | KW_DESC ; + +nullOrdering +@init { pushMsg("nulls ordering", state); } +@after { popMsg(state); } + : KW_NULLS KW_FIRST -> ^(TOK_NULLS_FIRST) + | KW_NULLS KW_LAST -> ^(TOK_NULLS_LAST) + ; + columnNameOrder @init { pushMsg("column name order", state); } @after { popMsg(state); } - : identifier (asc=KW_ASC | desc=KW_DESC)? - -> {$desc == null}? ^(TOK_TABSORTCOLNAMEASC identifier) - -> ^(TOK_TABSORTCOLNAMEDESC identifier) + : identifier orderSpec=orderSpecification? nullSpec=nullOrdering? + -> {$orderSpec.tree == null && $nullSpec.tree == null}? + ^(TOK_TABSORTCOLNAMEASC ^(TOK_NULLS_FIRST identifier)) + -> {$orderSpec.tree == null}? + ^(TOK_TABSORTCOLNAMEASC ^($nullSpec identifier)) + -> {$nullSpec.tree == null && $orderSpec.tree.getType()==HiveParser.KW_ASC}? + ^(TOK_TABSORTCOLNAMEASC ^(TOK_NULLS_FIRST identifier)) + -> {$nullSpec.tree == null && $orderSpec.tree.getType()==HiveParser.KW_DESC}? + ^(TOK_TABSORTCOLNAMEDESC ^(TOK_NULLS_LAST identifier)) + -> {$orderSpec.tree.getType()==HiveParser.KW_ASC}? + ^(TOK_TABSORTCOLNAMEASC ^($nullSpec identifier)) + -> ^(TOK_TABSORTCOLNAMEDESC ^($nullSpec identifier)) ; columnNameCommentList @@ -2030,9 +2055,18 @@ columnNameComment columnRefOrder @init { pushMsg("column order", state); } @after { popMsg(state); } - : expression (asc=KW_ASC | desc=KW_DESC)? - -> {$desc == null}? ^(TOK_TABSORTCOLNAMEASC expression) - -> ^(TOK_TABSORTCOLNAMEDESC expression) + : expression orderSpec=orderSpecification? nullSpec=nullOrdering? + -> {$orderSpec.tree == null && $nullSpec.tree == null}? + ^(TOK_TABSORTCOLNAMEASC ^(TOK_NULLS_FIRST expression)) + -> {$orderSpec.tree == null}? + ^(TOK_TABSORTCOLNAMEASC ^($nullSpec expression)) + -> {$nullSpec.tree == null && $orderSpec.tree.getType()==HiveParser.KW_ASC}? + ^(TOK_TABSORTCOLNAMEASC ^(TOK_NULLS_FIRST expression)) + -> {$nullSpec.tree == null && $orderSpec.tree.getType()==HiveParser.KW_DESC}? + ^(TOK_TABSORTCOLNAMEDESC ^(TOK_NULLS_LAST expression)) + -> {$orderSpec.tree.getType()==HiveParser.KW_ASC}? + ^(TOK_TABSORTCOLNAMEASC ^($nullSpec expression)) + -> ^(TOK_TABSORTCOLNAMEDESC ^($nullSpec expression)) ; columnNameType diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 61bd10c..a192fa7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -653,8 +653,8 @@ nonReserved | KW_ENABLE | KW_ESCAPED | KW_EXCLUSIVE | KW_EXPLAIN | KW_EXPORT | KW_FIELDS | KW_FILE | KW_FILEFORMAT | KW_FIRST | KW_FORMAT | KW_FORMATTED | KW_FUNCTIONS | KW_HOLD_DDLTIME | KW_HOUR | KW_IDXPROPERTIES | KW_IGNORE | KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR - | KW_KEYS | KW_KEY_TYPE | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG - | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_OFFLINE + | KW_KEYS | KW_KEY_TYPE | KW_LAST | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG + | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_NULLS | KW_OFFLINE | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java index a8980eb..ecf3cfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java @@ -508,18 +508,27 @@ public String toString() DESC; } + public static enum NullOrder + { + NULLS_FIRST, + NULLS_LAST; + } + public static class OrderExpression extends PartitionExpression { Order order; + NullOrder nullOrder; public OrderExpression() { order = Order.ASC; + nullOrder = NullOrder.NULLS_FIRST; } public OrderExpression(PartitionExpression peSpec) { super(peSpec); order = Order.ASC; + nullOrder = NullOrder.NULLS_FIRST; } public Order getOrder() @@ -532,12 +541,23 @@ public void setOrder(Order order) this.order = order; } + public NullOrder getNullOrder() + { + return nullOrder; + } + + public void setNullOrder(NullOrder nullOrder) + { + this.nullOrder = nullOrder; + } + @Override public int hashCode() { final int prime = 31; int result = super.hashCode(); result = prime * result + ((order == null) ? 0 : order.hashCode()); + result = prime * result + ((nullOrder == null) ? 0 : nullOrder.hashCode()); return result; } @@ -557,13 +577,16 @@ public boolean equals(Object obj) if (order != other.order) { return false; } + if (nullOrder != other.nullOrder) { + return false; + } return true; } @Override public String toString() { - return String.format("%s %s", super.toString(), order); + return String.format("%s %s %s", super.toString(), order, nullOrder); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java index 2370ec0..9921b21 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java @@ -499,6 +499,7 @@ private OrderExpressionDef translate(ShapeDetails inpShape, throws SemanticException { OrderExpressionDef oexpDef = new OrderExpressionDef(); oexpDef.setOrder(oExpr.getOrder()); + oexpDef.setNullOrder(oExpr.getNullOrder()); try { PTFExpressionDef expDef = buildExpressionDef(inpShape, oExpr.getExpression()); oexpDef.setExpressionTreeString(expDef.getExpressionTreeString()); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ba1945f..9898995 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6080,6 +6080,7 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, ArrayList partnCols = new ArrayList(); ArrayList sortCols = new ArrayList(); ArrayList sortOrders = new ArrayList(); + ArrayList nullPositions = new ArrayList(); boolean multiFileSpray = false; int numFiles = 1; int totalFiles = 1; @@ -6097,6 +6098,7 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, (dest_tab.getSortCols().size() > 0)) { sortCols = getSortCols(dest, qb, dest_tab, table_desc, input, true); sortOrders = getSortOrders(dest, qb, dest_tab, input); + nullPositions = getNullPositions(dest, qb, dest_tab, input); if (!enforceBucketing && !dest_tab.isIndexTable()) { throw new SemanticException(ErrorMsg.TBL_SORTED_NOT_BUCKETED.getErrorCodedMsg(dest_tab.getCompleteName())); } else { @@ -6134,8 +6136,12 @@ private Operator genBucketingSortingDest(String dest, Operator input, QB qb, for (int sortOrder : sortOrders) { order.append(sortOrder == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC ? '+' : '-'); } - input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(), maxReducers, - (AcidUtils.isAcidTable(dest_tab) ? getAcidType() : AcidUtils.Operation.NOT_ACID)); + StringBuilder nullPosition = new StringBuilder(); + for (int pos : nullPositions) { + nullPosition.append(pos == BaseSemanticAnalyzer.HIVE_COLUMN_NULLS_FIRST ? 'a' : 'z'); + } + input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(), nullPosition.toString(), + maxReducers, (AcidUtils.isAcidTable(dest_tab) ? getAcidType() : AcidUtils.Operation.NOT_ACID)); reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0)); ctx.setMultiFileSpray(multiFileSpray); ctx.setNumFiles(numFiles); @@ -7092,6 +7098,23 @@ private Operator genLimitMapRedPlan(String dest, QB qb, Operator input, return orders; } + private ArrayList getNullPositions(String dest, QB qb, Table tab, Operator input) + throws SemanticException { + List tabSortCols = tab.getSortCols(); + List tabCols = tab.getCols(); + + ArrayList orders = new ArrayList(); + for (Order sortCol : tabSortCols) { + for (FieldSchema tabCol : tabCols) { + if (sortCol.getCol().equals(tabCol.getName())) { + orders.add(sortCol.getNullOrder()); + break; + } + } + } + return orders; + } + private Operator genReduceSinkPlan(String dest, QB qb, Operator input, int numReducers, boolean hasOrderBy) throws SemanticException { @@ -7132,6 +7155,7 @@ private Operator genReduceSinkPlan(String dest, QB qb, Operator input, } ArrayList sortCols = new ArrayList(); StringBuilder order = new StringBuilder(); + StringBuilder nullPosition = new StringBuilder(); if (sortExprs != null) { int ccount = sortExprs.getChildCount(); for (int i = 0; i < ccount; ++i) { @@ -7141,20 +7165,40 @@ private Operator genReduceSinkPlan(String dest, QB qb, Operator input, // SortBy ASC order.append("+"); cl = (ASTNode) cl.getChild(0); + if (cl.getType() == HiveParser.TOK_NULLS_FIRST) { + nullPosition.append("a"); + } else if (cl.getType() == HiveParser.TOK_NULLS_LAST) { + nullPosition.append("z"); + } else { + throw new SemanticException( + "Unexpected null ordering option: " + cl.getType()); + } + cl = (ASTNode) cl.getChild(0); } else if (cl.getType() == HiveParser.TOK_TABSORTCOLNAMEDESC) { // SortBy DESC order.append("-"); cl = (ASTNode) cl.getChild(0); + if (cl.getType() == HiveParser.TOK_NULLS_FIRST) { + nullPosition.append("a"); + } else if (cl.getType() == HiveParser.TOK_NULLS_LAST) { + nullPosition.append("z"); + } else { + throw new SemanticException( + "Unexpected null ordering option: " + cl.getType()); + } + cl = (ASTNode) cl.getChild(0); } else { // ClusterBy order.append("+"); + nullPosition.append("a"); } ExprNodeDesc exprNode = genExprNodeDesc(cl, inputRR); sortCols.add(exprNode); } } Operator result = genReduceSinkPlan( - input, partCols, sortCols, order.toString(), numReducers, Operation.NOT_ACID); + input, partCols, sortCols, order.toString(), nullPosition.toString(), + numReducers, Operation.NOT_ACID); if (result.getParentOperators().size() == 1 && result.getParentOperators().get(0) instanceof ReduceSinkOperator) { ((ReduceSinkOperator) result.getParentOperators().get(0)) @@ -7166,7 +7210,8 @@ private Operator genReduceSinkPlan(String dest, QB qb, Operator input, @SuppressWarnings("nls") private Operator genReduceSinkPlan(Operator input, ArrayList partitionCols, ArrayList sortCols, - String sortOrder, int numReducers, AcidUtils.Operation acidOp) throws SemanticException { + String sortOrder, String nullPosition, int numReducers, AcidUtils.Operation acidOp) + throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRowResolver(); @@ -7233,7 +7278,7 @@ private Operator genReduceSinkPlan(Operator input, dummy.setParentOperators(null); ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(sortCols, valueCols, outputColumns, - false, -1, partitionCols, sortOrder, numReducers, acidOp); + false, -1, partitionCols, sortOrder, nullPosition, numReducers, acidOp); Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR); @@ -11262,7 +11307,7 @@ private void processPositionAlias(ASTNode ast) throws SemanticException { } } for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); ++child_pos) { - ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos); + ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos).getChild(0); ASTNode node = (ASTNode) colNode.getChild(0); if (node.getToken().getType() == HiveParser.Number) { if( isByPos ) { @@ -11594,13 +11639,20 @@ private OrderSpec processOrderSpec(ASTNode sortNode) { int exprCnt = sortNode.getChildCount(); for(int i=0; i < exprCnt; i++) { OrderExpression exprSpec = new OrderExpression(); - exprSpec.setExpression((ASTNode) sortNode.getChild(i).getChild(0)); - if ( sortNode.getChild(i).getType() == HiveParser.TOK_TABSORTCOLNAMEASC ) { + ASTNode orderSpec = (ASTNode) sortNode.getChild(i); + ASTNode nullOrderSpec = (ASTNode) orderSpec.getChild(0); + exprSpec.setExpression((ASTNode) nullOrderSpec.getChild(0)); + if ( orderSpec.getType() == HiveParser.TOK_TABSORTCOLNAMEASC ) { exprSpec.setOrder(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.ASC); } else { exprSpec.setOrder(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order.DESC); } + if ( nullOrderSpec.getType() == HiveParser.TOK_NULLS_FIRST ) { + exprSpec.setNullOrder(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder.NULLS_FIRST); + } else { + exprSpec.setNullOrder(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder.NULLS_LAST); + } oSpec.addExpression(exprSpec); } return oSpec; @@ -11938,7 +11990,8 @@ void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, RowResolver inputRR, ArrayList partCols, ArrayList orderCols, - StringBuilder orderString) throws SemanticException { + StringBuilder orderString, + StringBuilder nullPositionString) throws SemanticException { List partColList = tabDef.getPartition().getExpressions(); @@ -11948,6 +12001,7 @@ void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, partCols.add(exprNode); orderCols.add(exprNode); orderString.append('+'); + nullPositionString.append('a'); } } @@ -11962,13 +12016,16 @@ void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef, for (int i = 0; i < orderColList.size(); i++) { OrderExpressionDef colDef = orderColList.get(i); char orderChar = colDef.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-'; + char nullPositionChar = colDef.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z'; int index = ExprNodeDescUtils.indexOf(colDef.getExprNode(), orderCols); if (index >= 0) { orderString.setCharAt(index, orderChar); + nullPositionString.setCharAt(index, nullPositionChar); continue; } orderCols.add(colDef.getExprNode()); orderString.append(orderChar); + nullPositionString.append(nullPositionChar); } } @@ -12011,6 +12068,7 @@ private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operato ArrayList partCols = new ArrayList(); ArrayList orderCols = new ArrayList(); StringBuilder orderString = new StringBuilder(); + StringBuilder nullPositionString = new StringBuilder(); /* * Use the input RR of TableScanOperator in case there is no map-side @@ -12018,8 +12076,9 @@ private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operato * If the parent of ReduceSinkOperator is PTFOperator, use it's * output RR. */ - buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString); - input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), -1, Operation.NOT_ACID); + buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString, nullPositionString); + input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), + nullPositionString.toString(), -1, Operation.NOT_ACID); } /* @@ -12077,6 +12136,7 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, ArrayList partCols = new ArrayList(); ArrayList orderCols = new ArrayList(); StringBuilder order = new StringBuilder(); + StringBuilder nullPosition = new StringBuilder(); for (PartitionExpression partCol : spec.getQueryPartitionSpec().getExpressions()) { ExprNodeDesc partExpr = genExprNodeDesc(partCol.getExpression(), inputRR); @@ -12084,6 +12144,7 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, partCols.add(partExpr); orderCols.add(partExpr); order.append('+'); + nullPosition.append('a'); } } @@ -12091,17 +12152,21 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, for (OrderExpression orderCol : spec.getQueryOrderSpec().getExpressions()) { ExprNodeDesc orderExpr = genExprNodeDesc(orderCol.getExpression(), inputRR); char orderChar = orderCol.getOrder() == PTFInvocationSpec.Order.ASC ? '+' : '-'; + char nullPositionChar = orderCol.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST ? 'a' : 'z'; int index = ExprNodeDescUtils.indexOf(orderExpr, orderCols); if (index >= 0) { order.setCharAt(index, orderChar); + nullPosition.setCharAt(index, nullPositionChar); continue; } orderCols.add(genExprNodeDesc(orderCol.getExpression(), inputRR)); order.append(orderChar); + nullPosition.append(nullPositionChar); } } - return genReduceSinkPlan(input, partCols, orderCols, order.toString(), -1, Operation.NOT_ACID); + return genReduceSinkPlan(input, partCols, orderCols, order.toString(), nullPosition.toString(), + -1, Operation.NOT_ACID); } public static ArrayList parseSelect(String selectExprStr) diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 598520c..45dfd27 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -735,6 +735,8 @@ public ColumnExprProcessor getColumnExprProcessor() { windowingTokens.add(HiveParser.KW_CURRENT); windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEASC); windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEDESC); + windowingTokens.add(HiveParser.TOK_NULLS_FIRST); + windowingTokens.add(HiveParser.TOK_NULLS_LAST); } protected static boolean isRedundantConversionFunction(ASTNode expr, diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 04d26f3..3dfd12e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -23,15 +23,12 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; @@ -39,7 +36,6 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -66,8 +62,8 @@ import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -76,6 +72,8 @@ import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextInputFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * PlanUtils. @@ -389,7 +387,7 @@ public static TableDesc getDefaultTableDesc(String separatorCode) { * Generate the table descriptor for reduce key. */ public static TableDesc getReduceKeyTableDesc(List fieldSchemas, - String order) { + String order, String nullPosition) { return new TableDesc( SequenceFileInputFormat.class, SequenceFileOutputFormat.class, Utilities.makeProperties(serdeConstants.LIST_COLUMNS, MetaStoreUtils @@ -397,6 +395,7 @@ public static TableDesc getReduceKeyTableDesc(List fieldSchemas, serdeConstants.LIST_COLUMN_TYPES, MetaStoreUtils .getColumnTypesFromFieldSchema(fieldSchemas), serdeConstants.SERIALIZATION_SORT_ORDER, order, + serdeConstants.SERIALIZATION_NULL_POSITION, nullPosition, serdeConstants.SERIALIZATION_LIB, BinarySortableSerDe.class.getName())); } @@ -411,8 +410,10 @@ public static TableDesc getMapJoinKeyTableDesc(Configuration conf, // be broadcast (instead of partitioned). As a consequence we use // a different SerDe than in the MR mapjoin case. StringBuilder order = new StringBuilder(); + StringBuilder nullPosition = new StringBuilder(); for (FieldSchema f: fieldSchemas) { order.append("+"); + nullPosition.append("a"); } return new TableDesc( SequenceFileInputFormat.class, SequenceFileOutputFormat.class, @@ -421,6 +422,7 @@ public static TableDesc getMapJoinKeyTableDesc(Configuration conf, serdeConstants.LIST_COLUMN_TYPES, MetaStoreUtils .getColumnTypesFromFieldSchema(fieldSchemas), serdeConstants.SERIALIZATION_SORT_ORDER, order.toString(), + serdeConstants.SERIALIZATION_NULL_POSITION, nullPosition.toString(), serdeConstants.SERIALIZATION_LIB, BinarySortableSerDe.class.getName())); } else { return new TableDesc(SequenceFileInputFormat.class, @@ -606,15 +608,15 @@ public int compare(FieldSchema o1, FieldSchema o2) { public static ReduceSinkDesc getReduceSinkDesc( ArrayList keyCols, ArrayList valueCols, List outputColumnNames, boolean includeKeyCols, int tag, - ArrayList partitionCols, String order, int numReducers, - AcidUtils.Operation writeType) { + ArrayList partitionCols, String order, String nullPosition, + int numReducers, AcidUtils.Operation writeType) { return getReduceSinkDesc(keyCols, keyCols.size(), valueCols, new ArrayList>(), includeKeyCols ? outputColumnNames.subList(0, keyCols.size()) : new ArrayList(), includeKeyCols ? outputColumnNames.subList(keyCols.size(), outputColumnNames.size()) : outputColumnNames, - includeKeyCols, tag, partitionCols, order, numReducers, writeType); + includeKeyCols, tag, partitionCols, order, nullPosition, numReducers, writeType); } /** @@ -651,8 +653,8 @@ public static ReduceSinkDesc getReduceSinkDesc( List outputKeyColumnNames, List outputValueColumnNames, boolean includeKeyCols, int tag, - ArrayList partitionCols, String order, int numReducers, - AcidUtils.Operation writeType) { + ArrayList partitionCols, String order, String nullPosition, + int numReducers, AcidUtils.Operation writeType) { TableDesc keyTable = null; TableDesc valueTable = null; ArrayList outputKeyCols = new ArrayList(); @@ -663,11 +665,14 @@ public static ReduceSinkDesc getReduceSinkDesc( if (order.length() < outputKeyColumnNames.size()) { order = order + "+"; } - keyTable = getReduceKeyTableDesc(keySchema, order); + if (nullPosition.length() < outputKeyColumnNames.size()) { + nullPosition = nullPosition + "a"; + } + keyTable = getReduceKeyTableDesc(keySchema, order, nullPosition); outputKeyCols.addAll(outputKeyColumnNames); } else { keyTable = getReduceKeyTableDesc(getFieldSchemasFromColumnList( - keyCols, "reducesinkkey"),order); + keyCols, "reducesinkkey"), order, nullPosition); for (int i = 0; i < keyCols.size(); i++) { outputKeyCols.add("reducesinkkey" + i); } @@ -764,12 +769,14 @@ public static ReduceSinkDesc getReduceSinkDesc( } StringBuilder order = new StringBuilder(); + StringBuilder nullPosition = new StringBuilder(); for (int i = 0; i < keyCols.size(); i++) { order.append("+"); + nullPosition.append("a"); } return getReduceSinkDesc(keyCols, numKeys, valueCols, distinctColIndices, outputKeyColumnNames, outputValueColumnNames, includeKey, tag, - partitionCols, order.toString(), numReducers, writeType); + partitionCols, order.toString(), nullPosition.toString(), numReducers, writeType); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index 41d9ffe..a27d6b3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -22,10 +22,10 @@ import java.util.EnumSet; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @@ -380,6 +380,24 @@ public boolean isOrdering() { return false; } + /** + * Returns the null order in the key columns. + * + * @return null, which means default for all key columns, or a String + * of the same length as key columns, that consists of only "a" + * (null first) and "z" (null last). + */ + public String getNullPosition() { + return keySerializeInfo.getProperties().getProperty( + org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_POSITION); + } + + public void setNullPosition(String nullPositionStr) { + keySerializeInfo.getProperties().setProperty( + org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_POSITION, + nullPositionStr); + } + public List> getDistinctColumnIndices() { return distinctColumnIndices; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/OrderExpressionDef.java ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/OrderExpressionDef.java index e367d13..827911a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/OrderExpressionDef.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/OrderExpressionDef.java @@ -18,15 +18,18 @@ package org.apache.hadoop.hive.ql.plan.ptf; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order; public class OrderExpressionDef extends PTFExpressionDef { private Order order; + private NullOrder nullOrder; public OrderExpressionDef() {} public OrderExpressionDef(PTFExpressionDef e) { super(e); order = Order.ASC; + nullOrder = NullOrder.NULLS_FIRST; } public Order getOrder() { @@ -36,5 +39,13 @@ public Order getOrder() { public void setOrder(Order order) { this.order = order; } -} + public NullOrder getNullOrder() { + return nullOrder; + } + + public void setNullOrder(NullOrder nullOrder) { + this.nullOrder = nullOrder; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java index 2a8b1c0..54b0fb8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java @@ -122,8 +122,16 @@ public String getOrderExplain() { builder.append(", "); } builder.append(expression.getExprNode().getExprString()); - if (expression.getOrder() == PTFInvocationSpec.Order.DESC) { - builder.append("(DESC)"); + builder.append(" "); + if (expression.getOrder() == PTFInvocationSpec.Order.ASC) { + builder.append("ASC "); + } else { + builder.append("DESC "); + } + if (expression.getNullOrder() == PTFInvocationSpec.NullOrder.NULLS_FIRST) { + builder.append("NULLS FIRST"); + } else { + builder.append("NULLS LAST"); } } return builder.toString(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java index 2c076f50..2ac4039 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java @@ -27,19 +27,18 @@ import java.util.Map; import org.apache.commons.lang.ArrayUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.PTFPartition; -import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo; import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; import org.apache.hadoop.hive.ql.exec.PTFRollingPartition; +import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.NullOrder; import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; @@ -62,6 +61,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @SuppressWarnings("deprecation") public class WindowingTableFunction extends TableFunctionEvaluator { @@ -110,6 +111,7 @@ public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef(); Order order = wTFnDef.getOrder().getExpressions().get(0).getOrder(); + NullOrder nullOrder = wTFnDef.getOrder().getExpressions().get(0).getNullOrder(); for(WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) { boolean processWindow = processWindow(wFn); @@ -121,7 +123,7 @@ public void execute(PTFPartitionIterator pItr, PTFPartition outP) throws } oColumns.add((List)out); } else { - oColumns.add(executeFnwithWindow(getQueryDef(), wFn, iPart, order)); + oColumns.add(executeFnwithWindow(getQueryDef(), wFn, iPart, order, nullOrder)); } } @@ -421,7 +423,7 @@ public void startPartition() throws HiveException { int rowToProcess = streamingState.rollingPart.rowToProcess(wFn.getWindowFrame()); if (rowToProcess >= 0) { Range rng = getRange(wFn, rowToProcess, streamingState.rollingPart, - streamingState.order); + streamingState.order, streamingState.nullOrder); PTFPartitionIterator rItr = rng.iterator(); PTFOperator.connectLeadLagFunctionsToPartition(ptfDesc, rItr); Object out = evaluateWindowFunction(wFn, rItr); @@ -499,7 +501,7 @@ public void startPartition() throws HiveException { int rowToProcess = streamingState.rollingPart.size() - numRowsRemaining; if (rowToProcess >= 0) { Range rng = getRange(wFn, rowToProcess, streamingState.rollingPart, - streamingState.order); + streamingState.order, streamingState.nullOrder); PTFPartitionIterator rItr = rng.iterator(); PTFOperator.connectLeadLagFunctionsToPartition(ptfDesc, rItr); Object out = evaluateWindowFunction(wFn, rItr); @@ -659,11 +661,12 @@ public boolean carryForwardNames() { ArrayList executeFnwithWindow(PTFDesc ptfDesc, WindowFunctionDef wFnDef, PTFPartition iPart, - Order order) + Order order, + NullOrder nullOrder) throws HiveException { ArrayList vals = new ArrayList(); for(int i=0; i < iPart.size(); i++) { - Range rng = getRange(wFnDef, i, iPart, order); + Range rng = getRange(wFnDef, i, iPart, order, nullOrder); PTFPartitionIterator rItr = rng.iterator(); PTFOperator.connectLeadLagFunctionsToPartition(ptfDesc, rItr); Object out = evaluateWindowFunction(wFnDef, rItr); @@ -672,7 +675,7 @@ public boolean carryForwardNames() { return vals; } - private Range getRange(WindowFunctionDef wFnDef, int currRow, PTFPartition p, Order order) throws HiveException + private Range getRange(WindowFunctionDef wFnDef, int currRow, PTFPartition p, Order order, NullOrder nullOrder) throws HiveException { BoundaryDef startB = wFnDef.getWindowFrame().getStart(); BoundaryDef endB = wFnDef.getWindowFrame().getEnd(); @@ -691,10 +694,10 @@ private Range getRange(WindowFunctionDef wFnDef, int currRow, PTFPartition p, Or else { ValueBoundaryScanner vbs; if ( startB instanceof ValueBoundaryDef ) { - vbs = ValueBoundaryScanner.getScanner((ValueBoundaryDef)startB, order); + vbs = ValueBoundaryScanner.getScanner((ValueBoundaryDef)startB, order, nullOrder); } else { - vbs = ValueBoundaryScanner.getScanner((ValueBoundaryDef)endB, order); + vbs = ValueBoundaryScanner.getScanner((ValueBoundaryDef)endB, order, nullOrder); } vbs.reset(startB); start = vbs.computeStart(currRow, p); @@ -775,12 +778,14 @@ public Range(int start, int end, PTFPartition p) { BoundaryDef bndDef; Order order; + NullOrder nullOrder; PTFExpressionDef expressionDef; - public ValueBoundaryScanner(BoundaryDef bndDef, Order order, PTFExpressionDef expressionDef) + public ValueBoundaryScanner(BoundaryDef bndDef, Order order, NullOrder nullOrder, PTFExpressionDef expressionDef) { this.bndDef = bndDef; this.order = order; + this.nullOrder = nullOrder; this.expressionDef = expressionDef; } @@ -1135,7 +1140,7 @@ public Object computeValue(Object row) throws HiveException { @SuppressWarnings("incomplete-switch") - public static ValueBoundaryScanner getScanner(ValueBoundaryDef vbDef, Order order) + public static ValueBoundaryScanner getScanner(ValueBoundaryDef vbDef, Order order, NullOrder nullOrder) throws HiveException { PrimitiveObjectInspector pOI = (PrimitiveObjectInspector) vbDef.getOI(); switch(pOI.getPrimitiveCategory()) { @@ -1144,16 +1149,16 @@ public static ValueBoundaryScanner getScanner(ValueBoundaryDef vbDef, Order orde case LONG: case SHORT: case TIMESTAMP: - return new LongValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); + return new LongValueBoundaryScanner(vbDef, order, nullOrder, vbDef.getExpressionDef()); case DOUBLE: case FLOAT: - return new DoubleValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); + return new DoubleValueBoundaryScanner(vbDef, order, nullOrder, vbDef.getExpressionDef()); case DECIMAL: - return new HiveDecimalValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); + return new HiveDecimalValueBoundaryScanner(vbDef, order, nullOrder, vbDef.getExpressionDef()); case DATE: - return new DateValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); + return new DateValueBoundaryScanner(vbDef, order, nullOrder, vbDef.getExpressionDef()); case STRING: - return new StringValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); + return new StringValueBoundaryScanner(vbDef, order, nullOrder, vbDef.getExpressionDef()); } throw new HiveException( String.format("Internal Error: attempt to setup a Window for datatype %s", @@ -1162,9 +1167,9 @@ public static ValueBoundaryScanner getScanner(ValueBoundaryDef vbDef, Order orde } public static class LongValueBoundaryScanner extends ValueBoundaryScanner { - public LongValueBoundaryScanner(BoundaryDef bndDef, Order order, + public LongValueBoundaryScanner(BoundaryDef bndDef, Order order, NullOrder nullOrder, PTFExpressionDef expressionDef) { - super(bndDef,order,expressionDef); + super(bndDef,order,nullOrder,expressionDef); } @Override @@ -1196,8 +1201,8 @@ public boolean isEqual(Object v1, Object v2) { public static class DoubleValueBoundaryScanner extends ValueBoundaryScanner { public DoubleValueBoundaryScanner(BoundaryDef bndDef, Order order, - PTFExpressionDef expressionDef) { - super(bndDef,order,expressionDef); + NullOrder nullOrder, PTFExpressionDef expressionDef) { + super(bndDef,order,nullOrder,expressionDef); } @Override @@ -1229,8 +1234,8 @@ public boolean isEqual(Object v1, Object v2) { public static class HiveDecimalValueBoundaryScanner extends ValueBoundaryScanner { public HiveDecimalValueBoundaryScanner(BoundaryDef bndDef, Order order, - PTFExpressionDef expressionDef) { - super(bndDef,order,expressionDef); + NullOrder nullOrder, PTFExpressionDef expressionDef) { + super(bndDef,order,nullOrder,expressionDef); } @Override @@ -1262,8 +1267,8 @@ public boolean isEqual(Object v1, Object v2) { public static class DateValueBoundaryScanner extends ValueBoundaryScanner { public DateValueBoundaryScanner(BoundaryDef bndDef, Order order, - PTFExpressionDef expressionDef) { - super(bndDef,order,expressionDef); + NullOrder nullOrder, PTFExpressionDef expressionDef) { + super(bndDef,order,nullOrder,expressionDef); } @Override @@ -1290,8 +1295,8 @@ public boolean isEqual(Object v1, Object v2) { public static class StringValueBoundaryScanner extends ValueBoundaryScanner { public StringValueBoundaryScanner(BoundaryDef bndDef, Order order, - PTFExpressionDef expressionDef) { - super(bndDef,order,expressionDef); + NullOrder nullOrder, PTFExpressionDef expressionDef) { + super(bndDef,order,nullOrder,expressionDef); } @Override @@ -1347,6 +1352,7 @@ public int size() { int[] wFnsToProcess; WindowTableFunctionDef wTFnDef; Order order; + NullOrder nullOrder; PTFDesc ptfDesc; StructObjectInspector inputOI; AggregationBuffer[] aggBuffers; @@ -1362,6 +1368,7 @@ public int size() { this.currIdx = 0; wTFnDef = (WindowTableFunctionDef) getTableDef(); order = wTFnDef.getOrder().getExpressions().get(0).getOrder(); + nullOrder = wTFnDef.getOrder().getExpressions().get(0).getNullOrder(); ptfDesc = getQueryDef(); inputOI = iPart.getOutputOI(); @@ -1416,7 +1423,7 @@ public Object next() { out = ObjectInspectorUtils.copyToStandardObject(out, wFn.getOI()); output.set(j, out); } else { - Range rng = getRange(wFn, currIdx, iPart, order); + Range rng = getRange(wFn, currIdx, iPart, order, nullOrder); PTFPartitionIterator rItr = rng.iterator(); PTFOperator.connectLeadLagFunctionsToPartition(ptfDesc, rItr); output.set(j, evaluateWindowFunction(wFn, rItr)); @@ -1453,6 +1460,7 @@ public void remove() { AggregationBuffer[] aggBuffers; Object[][] funcArgs; Order order; + NullOrder nullOrder; RankLimit rnkLimit; @SuppressWarnings("unchecked") @@ -1467,6 +1475,7 @@ public void remove() { precedingSpan, followingSpan); order = tabDef.getOrder().getExpressions().get(0).getOrder(); + nullOrder = tabDef.getOrder().getExpressions().get(0).getNullOrder(); int numFns = tabDef.getWindowFunctions().size(); fnOutputs = new ArrayList[numFns]; diff --git ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java index cf7eb70..7f6430f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java +++ ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java @@ -224,7 +224,7 @@ public void inputSplitNullBase() throws Exception { @Test public void sortedTable() throws Exception { List sortCols = new ArrayList(1); - sortCols.add(new Order("b", 1)); + sortCols.add(new Order("b", 1, 0)); Table t = newTable("default", "st", false, new HashMap(), sortCols, false); @@ -249,7 +249,7 @@ public void sortedTable() throws Exception { @Test public void sortedPartition() throws Exception { List sortCols = new ArrayList(1); - sortCols.add(new Order("b", 1)); + sortCols.add(new Order("b", 1, 0)); Table t = newTable("default", "sp", true, new HashMap(), sortCols, false); Partition p = newPartition(t, "today", sortCols); diff --git ql/src/test/queries/clientpositive/order_null.q ql/src/test/queries/clientpositive/order_null.q new file mode 100644 index 0000000..61a2c16 --- /dev/null +++ ql/src/test/queries/clientpositive/order_null.q @@ -0,0 +1,29 @@ +create table src_null (a int, b string); +insert into src_null values (1, 'A'); +insert into src_null values (null, null); +insert into src_null values (2, 'B'); +insert into src_null values (2, 'A'); +insert into src_null values (2, null); +insert into src_null values (3, null); + +SELECT x.* FROM src_null x ORDER BY a asc; + +SELECT x.* FROM src_null x ORDER BY a desc; + +SELECT x.* FROM src_null x ORDER BY b asc; + +SELECT x.* FROM src_null x ORDER BY b desc; + +SELECT x.* FROM src_null x ORDER BY a asc nulls first; + +SELECT x.* FROM src_null x ORDER BY a desc nulls first; + +SELECT x.* FROM src_null x ORDER BY b asc nulls last; + +SELECT x.* FROM src_null x ORDER BY b desc nulls last; + +SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc; + +SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last; + +SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last; diff --git ql/src/test/results/clientpositive/order_null.q.out ql/src/test/results/clientpositive/order_null.q.out new file mode 100644 index 0000000..73ed5d8 --- /dev/null +++ ql/src/test/results/clientpositive/order_null.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: create table src_null (a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_null +POSTHOOK: query: create table src_null (a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_null +PREHOOK: query: insert into src_null values (1, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (1, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (null, null) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (2, 'B') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'B') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (2, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (2, null) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (3, null) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (3, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +NULL NULL +1 A +2 NULL +2 A +2 B +3 NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +3 NULL +2 NULL +2 A +2 B +1 A +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +3 NULL +2 NULL +NULL NULL +2 A +1 A +2 B +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 B +2 A +1 A +3 NULL +2 NULL +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +NULL NULL +1 A +2 NULL +2 A +2 B +3 NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +NULL NULL +3 NULL +2 NULL +2 A +2 B +1 A +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 A +1 A +2 B +3 NULL +2 NULL +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 B +2 A +1 A +3 NULL +2 NULL +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +1 A +2 B +2 A +2 NULL +3 NULL +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 B +2 A +1 A +3 NULL +2 NULL +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 NULL +3 NULL +NULL NULL +1 A +2 A +2 B diff --git ql/src/test/results/clientpositive/tez/order_null.q.out ql/src/test/results/clientpositive/tez/order_null.q.out new file mode 100644 index 0000000..4899946 --- /dev/null +++ ql/src/test/results/clientpositive/tez/order_null.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: create table src_null (a int, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_null +POSTHOOK: query: create table src_null (a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_null +PREHOOK: query: insert into src_null values (1, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (1, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (null, null) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (2, 'B') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'B') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (2, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (2, null) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (2, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into src_null values (3, null) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@src_null +POSTHOOK: query: insert into src_null values (3, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +NULL NULL +1 A +2 NULL +2 B +2 A +3 NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +3 NULL +2 NULL +2 B +2 A +1 A +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +NULL NULL +2 NULL +3 NULL +1 A +2 A +2 B +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 B +1 A +2 A +NULL NULL +2 NULL +3 NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls first +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +NULL NULL +1 A +2 NULL +2 B +2 A +3 NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a desc nulls first +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +NULL NULL +3 NULL +2 NULL +2 B +2 A +1 A +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +1 A +2 A +2 B +NULL NULL +2 NULL +3 NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 B +1 A +2 A +NULL NULL +2 NULL +3 NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY a asc nulls last, b desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +1 A +2 B +2 A +2 NULL +3 NULL +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b desc nulls last, a desc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 B +2 A +1 A +3 NULL +2 NULL +NULL NULL +PREHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: SELECT x.* FROM src_null x ORDER BY b asc nulls first, a asc nulls last +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +2 NULL +3 NULL +NULL NULL +1 A +2 A +2 B diff --git serde/if/serde.thrift serde/if/serde.thrift index 40d5f47..0b3804d 100644 --- serde/if/serde.thrift +++ serde/if/serde.thrift @@ -30,6 +30,7 @@ const string SERIALIZATION_NULL_FORMAT = "serialization.null.format" const string SERIALIZATION_ESCAPE_CRLF = "serialization.escape.crlf" const string SERIALIZATION_LAST_COLUMN_TAKES_REST = "serialization.last.column.takes.rest" const string SERIALIZATION_SORT_ORDER = "serialization.sort.order" +const string SERIALIZATION_NULL_POSITION = "serialization.null.position"; const string SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object" const string SERIALIZATION_ENCODING = "serialization.encoding" diff --git serde/src/gen/thrift/gen-cpp/serde_constants.cpp serde/src/gen/thrift/gen-cpp/serde_constants.cpp index 243d3b8..75701e2 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.cpp +++ serde/src/gen/thrift/gen-cpp/serde_constants.cpp @@ -27,6 +27,8 @@ serdeConstants::serdeConstants() { SERIALIZATION_SORT_ORDER = "serialization.sort.order"; + SERIALIZATION_NULL_POSITION = "serialization.null.position"; + SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object"; SERIALIZATION_ENCODING = "serialization.encoding"; diff --git serde/src/gen/thrift/gen-cpp/serde_constants.h serde/src/gen/thrift/gen-cpp/serde_constants.h index 3566ead..6d85928 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.h +++ serde/src/gen/thrift/gen-cpp/serde_constants.h @@ -23,6 +23,7 @@ class serdeConstants { std::string SERIALIZATION_ESCAPE_CRLF; std::string SERIALIZATION_LAST_COLUMN_TAKES_REST; std::string SERIALIZATION_SORT_ORDER; + std::string SERIALIZATION_NULL_POSITION; std::string SERIALIZATION_USE_JSON_OBJECTS; std::string SERIALIZATION_ENCODING; std::string FIELD_DELIM; diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 8b3eeb7..5cbbfaa 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -52,6 +52,8 @@ public static final String SERIALIZATION_SORT_ORDER = "serialization.sort.order"; + public static final String SERIALIZATION_NULL_POSITION = "serialization.null.position"; + public static final String SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object"; public static final String SERIALIZATION_ENCODING = "serialization.encoding"; diff --git serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php index 8370698..0bc6dd7 100644 --- serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php +++ serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php @@ -26,6 +26,7 @@ final class Constant extends \Thrift\Type\TConstant { static protected $SERIALIZATION_ESCAPE_CRLF; static protected $SERIALIZATION_LAST_COLUMN_TAKES_REST; static protected $SERIALIZATION_SORT_ORDER; + static protected $SERIALIZATION_NULL_POSITION; static protected $SERIALIZATION_USE_JSON_OBJECTS; static protected $SERIALIZATION_ENCODING; static protected $FIELD_DELIM; @@ -97,6 +98,10 @@ final class Constant extends \Thrift\Type\TConstant { return "serialization.sort.order"; } + static protected function init_SERIALIZATION_NULL_POSITION() { + return "serialization.null.position"; + } + static protected function init_SERIALIZATION_USE_JSON_OBJECTS() { return "serialization.use.json.object"; } diff --git serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py index 6ef3bcf..7939791 100644 --- serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py +++ serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py @@ -17,6 +17,7 @@ SERIALIZATION_ESCAPE_CRLF = "serialization.escape.crlf" SERIALIZATION_LAST_COLUMN_TAKES_REST = "serialization.last.column.takes.rest" SERIALIZATION_SORT_ORDER = "serialization.sort.order" +SERIALIZATION_NULL_POSITION = "serialization.null.position" SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object" SERIALIZATION_ENCODING = "serialization.encoding" FIELD_DELIM = "field.delim" diff --git serde/src/gen/thrift/gen-rb/serde_constants.rb serde/src/gen/thrift/gen-rb/serde_constants.rb index f98441b..d09e3c2 100644 --- serde/src/gen/thrift/gen-rb/serde_constants.rb +++ serde/src/gen/thrift/gen-rb/serde_constants.rb @@ -23,6 +23,8 @@ SERIALIZATION_LAST_COLUMN_TAKES_REST = %q"serialization.last.column.takes.rest" SERIALIZATION_SORT_ORDER = %q"serialization.sort.order" +SERIALIZATION_NULL_POSITION = %q"serialization.null.position" + SERIALIZATION_USE_JSON_OBJECTS = %q"serialization.use.json.object" SERIALIZATION_ENCODING = %q"serialization.encoding" diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java index 144ea5a..4e7eb2b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java @@ -28,8 +28,6 @@ import java.util.Map; import java.util.Properties; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -92,16 +90,18 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * BinarySortableSerDe can be used to write data in a way that the data can be * compared byte-by-byte with the same order. * - * The data format: NULL: a single byte \0 NON-NULL Primitives: ALWAYS prepend a - * single byte \1, and then: Boolean: FALSE = \1, TRUE = \2 Byte: flip the - * sign-bit to make sure negative comes before positive Short: flip the sign-bit - * to make sure negative comes before positive Int: flip the sign-bit to make - * sure negative comes before positive Long: flip the sign-bit to make sure + * The data format: NULL: a single byte (\0 or \1, check below) NON-NULL Primitives: + * ALWAYS prepend a single byte (\0 or \1), and then: Boolean: FALSE = \1, TRUE = \2 + * Byte: flip the sign-bit to make sure negative comes before positive Short: flip the + * sign-bit to make sure negative comes before positive Int: flip the sign-bit to + * make sure negative comes before positive Long: flip the sign-bit to make sure * negative comes before positive Double: flip the sign-bit for positive double, * and all bits for negative double values String: NULL-terminated UTF-8 string, * with NULL escaped to \1 \1, and \1 escaped to \1 \2 NON-NULL Complex Types: @@ -115,14 +115,23 @@ * field should be sorted ascendingly, and "-" means descendingly. The sub * fields in the same top-level field will have the same sort order. * + * This SerDe takes an additional parameter SERIALIZATION_NULL_POSITION which is a + * string containing only "a" and "z". The length of the string should equal to + * the number of fields in the top-level struct for serialization. "a" means that + * NULL should come first (thus, single byte is \0 for ascending order, \1 + * for descending order), while "z" means that NULL should come last (thus, single + * byte is \1 for ascending order, \0 for descending order). */ @SerDeSpec(schemaProps = { serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES, - serdeConstants.SERIALIZATION_SORT_ORDER}) + serdeConstants.SERIALIZATION_SORT_ORDER, serdeConstants.SERIALIZATION_NULL_POSITION}) public class BinarySortableSerDe extends AbstractSerDe { public static final Logger LOG = LoggerFactory.getLogger(BinarySortableSerDe.class.getName()); + public static final byte ZERO = (byte) 0; + public static final byte ONE = (byte) 1; + List columnNames; List columnTypes; @@ -130,6 +139,8 @@ StructObjectInspector rowObjectInspector; boolean[] columnSortOrderIsDesc; + byte[] columnNullMarker; + byte[] columnNotNullMarker; public static Charset decimalCharSet = Charset.forName("US-ASCII"); @@ -170,6 +181,37 @@ public void initialize(Configuration conf, Properties tbl) columnSortOrderIsDesc[i] = (columnSortOrder != null && columnSortOrder .charAt(i) == '-'); } + + // Null first/last + String columnNullOrder = tbl + .getProperty(serdeConstants.SERIALIZATION_NULL_POSITION); + columnNullMarker = new byte[columnNames.size()]; + columnNotNullMarker = new byte[columnNames.size()]; + for (int i = 0; i < columnSortOrderIsDesc.length; i++) { + if (columnSortOrderIsDesc[i]) { + // Descending + if (columnNullOrder != null && columnNullOrder.charAt(i) == 'a') { + // Null first + columnNullMarker[i] = ONE; + columnNotNullMarker[i] = ZERO; + } else { + // Null last (default for descending order) + columnNullMarker[i] = ZERO; + columnNotNullMarker[i] = ONE; + } + } else { + // Ascending + if (columnNullOrder != null && columnNullOrder.charAt(i) == 'z') { + // Null last + columnNullMarker[i] = ONE; + columnNotNullMarker[i] = ZERO; + } else { + // Null first (default for ascending order) + columnNullMarker[i] = ZERO; + columnNotNullMarker[i] = ONE; + } + } + } } @Override @@ -193,7 +235,7 @@ public Object deserialize(Writable blob) throws SerDeException { try { for (int i = 0; i < columnNames.size(); i++) { row.set(i, deserialize(inputByteBuffer, columnTypes.get(i), - columnSortOrderIsDesc[i], row.get(i))); + columnSortOrderIsDesc[i], columnNullMarker[i], columnNotNullMarker[i], row.get(i))); } } catch (IOException e) { throw new SerDeException(e); @@ -203,14 +245,14 @@ public Object deserialize(Writable blob) throws SerDeException { } static Object deserialize(InputByteBuffer buffer, TypeInfo type, - boolean invert, Object reuse) throws IOException { + boolean invert, byte nullMarker, byte notNullMarker, Object reuse) throws IOException { // Is this field a null? byte isNull = buffer.read(invert); - if (isNull == 0) { + if (isNull == nullMarker) { return null; } - assert (isNull == 1); + assert (isNull == notNullMarker); switch (type.getCategory()) { case PRIMITIVE: { @@ -475,7 +517,7 @@ static Object deserialize(InputByteBuffer buffer, TypeInfo type, if (size == r.size()) { r.add(null); } - r.set(size, deserialize(buffer, etype, invert, r.get(size))); + r.set(size, deserialize(buffer, etype, invert, nullMarker, notNullMarker, r.get(size))); size++; } // Remove additional elements if the list is reused @@ -506,8 +548,8 @@ static Object deserialize(InputByteBuffer buffer, TypeInfo type, } // \1 followed by each key and then each value assert (more == 1); - Object k = deserialize(buffer, ktype, invert, null); - Object v = deserialize(buffer, vtype, invert, null); + Object k = deserialize(buffer, ktype, invert, nullMarker, notNullMarker, null); + Object v = deserialize(buffer, vtype, invert, nullMarker, notNullMarker, null); r.put(k, v); } return r; @@ -527,7 +569,7 @@ static Object deserialize(InputByteBuffer buffer, TypeInfo type, // Read one field by one field for (int eid = 0; eid < size; eid++) { r - .set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r + .set(eid, deserialize(buffer, fieldTypes.get(eid), invert, nullMarker, notNullMarker, r .get(eid))); } return r; @@ -540,7 +582,7 @@ static Object deserialize(InputByteBuffer buffer, TypeInfo type, byte tag = buffer.read(invert); r.setTag(tag); r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), - invert, null)); + invert, nullMarker, notNullMarker, null)); return r; } default: { @@ -626,7 +668,8 @@ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDe for (int i = 0; i < columnNames.size(); i++) { serialize(output, soi.getStructFieldData(obj, fields.get(i)), - fields.get(i).getFieldObjectInspector(), columnSortOrderIsDesc[i]); + fields.get(i).getFieldObjectInspector(), columnSortOrderIsDesc[i], + columnNullMarker[i], columnNotNullMarker[i]); } serializeBytesWritable.set(output.getData(), 0, output.getLength()); @@ -641,14 +684,14 @@ public static void writeByte(RandomAccessOutput buffer, byte b, boolean invert) } static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, - boolean invert) throws SerDeException { + boolean invert, byte nullMarker, byte notNullMarker) throws SerDeException { // Is this field a null? if (o == null) { - writeByte(buffer, (byte) 0, invert); + writeByte(buffer, nullMarker, invert); return; } // This field is not a null. - writeByte(buffer, (byte) 1, invert); + writeByte(buffer, notNullMarker, invert); switch (oi.getCategory()) { case PRIMITIVE: { @@ -786,7 +829,7 @@ static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, int size = loi.getListLength(o); for (int eid = 0; eid < size; eid++) { writeByte(buffer, (byte) 1, invert); - serialize(buffer, loi.getListElement(o, eid), eoi, invert); + serialize(buffer, loi.getListElement(o, eid), eoi, invert, nullMarker, notNullMarker); } // and \0 to terminate writeByte(buffer, (byte) 0, invert); @@ -801,8 +844,8 @@ static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, Map map = moi.getMap(o); for (Map.Entry entry : map.entrySet()) { writeByte(buffer, (byte) 1, invert); - serialize(buffer, entry.getKey(), koi, invert); - serialize(buffer, entry.getValue(), voi, invert); + serialize(buffer, entry.getKey(), koi, invert, nullMarker, notNullMarker); + serialize(buffer, entry.getValue(), voi, invert, nullMarker, notNullMarker); } // and \0 to terminate writeByte(buffer, (byte) 0, invert); @@ -814,7 +857,7 @@ static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, for (int i = 0; i < fields.size(); i++) { serialize(buffer, soi.getStructFieldData(o, fields.get(i)), fields.get( - i).getFieldObjectInspector(), invert); + i).getFieldObjectInspector(), invert, nullMarker, notNullMarker); } return; } @@ -823,7 +866,7 @@ static void serialize(ByteStream.Output buffer, Object o, ObjectInspector oi, byte tag = uoi.getTag(o); writeByte(buffer, tag, invert); serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), - invert); + invert, nullMarker, notNullMarker); return; } default: { @@ -971,13 +1014,24 @@ public SerDeStats getSerDeStats() { } public static void serializeStruct(Output byteStream, Object[] fieldData, - List fieldOis, boolean[] sortableSortOrders) throws SerDeException { + List fieldOis, boolean[] sortableSortOrders, + byte[] nullMarkers, byte[] notNullMarkers) throws SerDeException { for (int i = 0; i < fieldData.length; i++) { - serialize(byteStream, fieldData[i], fieldOis.get(i), sortableSortOrders[i]); + serialize(byteStream, fieldData[i], fieldOis.get(i), sortableSortOrders[i], + nullMarkers[i], notNullMarkers[i]); } } public boolean[] getSortOrders() { return columnSortOrderIsDesc; } + + public byte[] getNullMarkers() { + return columnNullMarker; + } + + public byte[] getNotNullMarkers() { + return columnNotNullMarker; + } + } diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java index ec43ae3..73e20a8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDeWithEndPrefix.java @@ -28,7 +28,7 @@ public static void serializeStruct(Output byteStream, Object[] fieldData, List fieldOis, boolean endPrefix) throws SerDeException { for (int i = 0; i < fieldData.length; i++) { - serialize(byteStream, fieldData[i], fieldOis.get(i), false); + serialize(byteStream, fieldData[i], fieldOis.get(i), false, ZERO, ONE); } if (endPrefix) { if (fieldData[fieldData.length-1]!=null) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java index 9ea6e91..71d7237 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java @@ -23,8 +23,6 @@ import java.sql.Timestamp; import java.util.Arrays; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -32,10 +30,12 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hive.common.util.DateUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /* * Directly serialize, field-by-field, the BinarySortable format. @@ -49,6 +49,9 @@ // The sort order (ascending/descending) for each field. Set to true when descending (invert). private boolean[] columnSortOrderIsDesc; + // Null first/last + private byte[] columnNullMarker; + private byte[] columnNotNullMarker; // Which field we are on. We start with -1 to be consistent in style with // BinarySortableDeserializeRead. @@ -58,20 +61,28 @@ private TimestampWritable tempTimestampWritable; - public BinarySortableSerializeWrite(boolean[] columnSortOrderIsDesc) { + public BinarySortableSerializeWrite(boolean[] columnSortOrderIsDesc, + byte[] columnNullMarker, byte[] columnNotNullMarker) { this(); fieldCount = columnSortOrderIsDesc.length; this.columnSortOrderIsDesc = columnSortOrderIsDesc; + this.columnNullMarker = columnNullMarker; + this.columnNotNullMarker = columnNotNullMarker; } /* * Use this constructor when only ascending sort order is used. + * By default for ascending order, NULL first. */ public BinarySortableSerializeWrite(int fieldCount) { this(); this.fieldCount = fieldCount; columnSortOrderIsDesc = new boolean[fieldCount]; Arrays.fill(columnSortOrderIsDesc, false); + columnNullMarker = new byte[fieldCount]; + Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); + columnNotNullMarker = new byte[fieldCount]; + Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); } // Not public since we must have the field count or column sort order information. @@ -112,7 +123,8 @@ public void reset() { */ @Override public void writeNull() throws IOException { - BinarySortableSerDe.writeByte(output, (byte) 0, columnSortOrderIsDesc[++index]); + ++index; + BinarySortableSerDe.writeByte(output, columnNullMarker[index], columnSortOrderIsDesc[index]); } /* @@ -120,10 +132,12 @@ public void writeNull() throws IOException { */ @Override public void writeBoolean(boolean v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.writeByte(output, (byte) (v ? 2 : 1), invert); } @@ -133,10 +147,12 @@ public void writeBoolean(boolean v) throws IOException { */ @Override public void writeByte(byte v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.writeByte(output, (byte) (v ^ 0x80), invert); } @@ -146,10 +162,12 @@ public void writeByte(byte v) throws IOException { */ @Override public void writeShort(short v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeShort(output, v, invert); } @@ -159,10 +177,12 @@ public void writeShort(short v) throws IOException { */ @Override public void writeInt(int v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeInt(output, v, invert); } @@ -172,10 +192,12 @@ public void writeInt(int v) throws IOException { */ @Override public void writeLong(long v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeLong(output, v, invert); } @@ -185,10 +207,12 @@ public void writeLong(long v) throws IOException { */ @Override public void writeFloat(float vf) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeFloat(output, vf, invert); } @@ -198,10 +222,12 @@ public void writeFloat(float vf) throws IOException { */ @Override public void writeDouble(double vd) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeDouble(output, vd, invert); } @@ -214,20 +240,24 @@ public void writeDouble(double vd) throws IOException { */ @Override public void writeString(byte[] v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeBytes(output, v, 0, v.length, invert); } @Override public void writeString(byte[] v, int start, int length) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeBytes(output, v, start, length, invert); } @@ -257,20 +287,24 @@ public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException { */ @Override public void writeBinary(byte[] v) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeBytes(output, v, 0, v.length, invert); } @Override public void writeBinary(byte[] v, int start, int length) { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeBytes(output, v, start, length, invert); } @@ -280,10 +314,12 @@ public void writeBinary(byte[] v, int start, int length) { */ @Override public void writeDate(Date date) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeInt(output, DateWritable.dateToDays(date), invert); } @@ -291,10 +327,12 @@ public void writeDate(Date date) throws IOException { // We provide a faster way to write a date without a Date object. @Override public void writeDate(int dateAsDays) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeInt(output, dateAsDays, invert); } @@ -304,10 +342,12 @@ public void writeDate(int dateAsDays) throws IOException { */ @Override public void writeTimestamp(Timestamp vt) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); tempTimestampWritable.set(vt); BinarySortableSerDe.serializeTimestampWritable(output, tempTimestampWritable, invert); @@ -318,20 +358,24 @@ public void writeTimestamp(Timestamp vt) throws IOException { */ @Override public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeHiveIntervalYearMonth(output, viyt, invert); } @Override public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeInt(output, totalMonths, invert); } @@ -341,20 +385,24 @@ public void writeHiveIntervalYearMonth(int totalMonths) throws IOException { */ @Override public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeHiveIntervalDayTime(output, vidt, invert); } @Override public void writeHiveIntervalDayTime(long totalNanos) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); long totalSecs = DateUtils.getIntervalDayTimeTotalSecondsFromTotalNanos(totalNanos); int nanos = DateUtils.getIntervalDayTimeNanosFromTotalNanos(totalNanos); @@ -367,10 +415,12 @@ public void writeHiveIntervalDayTime(long totalNanos) throws IOException { */ @Override public void writeHiveDecimal(HiveDecimal dec, int scale) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; + ++index; + + final boolean invert = columnSortOrderIsDesc[index]; // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); + BinarySortableSerDe.writeByte(output, columnNotNullMarker[index], invert); BinarySortableSerDe.serializeHiveDecimal(output, dec, invert); } diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java index ae476ae..c43eae2 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java @@ -23,8 +23,6 @@ import java.util.Map; import java.util.Random; -import junit.framework.TestCase; - import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; @@ -33,20 +31,23 @@ import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.BytesWritable; +import junit.framework.TestCase; + public class TestBinarySortableFast extends TestCase { private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, - boolean[] columnSortOrderIsDesc, SerDe serde, StructObjectInspector rowOI, boolean ascending, + boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, + SerDe serde, StructObjectInspector rowOI, boolean ascending, Map primitiveTypeInfoMap) throws Throwable { - BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc); + BinarySortableSerializeWrite binarySortableSerializeWrite = + new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); // Try to serialize @@ -227,15 +228,24 @@ public void testBinarySortableFast() throws Throwable { String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); String order; order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '+'); - SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order); + String nullPosition; + nullPosition = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'a'); + SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullPosition); order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '-'); - SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order); + nullPosition = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'z'); + SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullPosition); boolean[] columnSortOrderIsDesc = new boolean[MyTestPrimitiveClass.primitiveCount]; Arrays.fill(columnSortOrderIsDesc, false); - testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, serde_ascending, rowOI, true, primitiveTypeInfoMap); + byte[] columnNullMarker = new byte[MyTestPrimitiveClass.primitiveCount]; + Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); + byte[] columnNotNullMarker = new byte[MyTestPrimitiveClass.primitiveCount]; + Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); + testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker, + columnNotNullMarker, serde_ascending, rowOI, true, primitiveTypeInfoMap); Arrays.fill(columnSortOrderIsDesc, true); - testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, serde_descending, rowOI, false, primitiveTypeInfoMap); + testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker, + columnNotNullMarker, serde_descending, rowOI, false, primitiveTypeInfoMap); } catch (Throwable e) { e.printStackTrace(); throw e; diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java index af47e6f..72e0917 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java @@ -17,18 +17,12 @@ */ package org.apache.hadoop.hive.serde2.binarysortable; -import java.sql.Date; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Properties; import java.util.Random; -import junit.framework.TestCase; - import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -40,6 +34,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BytesWritable; +import junit.framework.TestCase; + /** * TestBinarySortableSerDe. * @@ -66,12 +62,13 @@ public static String hexString(BytesWritable bytes) { return sb.toString(); } - public static SerDe getSerDe(String fieldNames, String fieldTypes, String order) + public static SerDe getSerDe(String fieldNames, String fieldTypes, String order, String nullPosition) throws Throwable { Properties schema = new Properties(); schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames); schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes); schema.setProperty(serdeConstants.SERIALIZATION_SORT_ORDER, order); + schema.setProperty(serdeConstants.SERIALIZATION_NULL_POSITION, nullPosition); BinarySortableSerDe serde = new BinarySortableSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), schema, null); @@ -172,11 +169,14 @@ public void testBinarySortableSerDe() throws Throwable { String order; order = StringUtils.leftPad("", MyTestClass.fieldCount, '+'); + String nullPosition; + nullPosition = StringUtils.leftPad("", MyTestClass.fieldCount, 'a'); testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes, - order), true); + order, nullPosition), true); order = StringUtils.leftPad("", MyTestClass.fieldCount, '-'); + nullPosition = StringUtils.leftPad("", MyTestClass.fieldCount, 'z'); testBinarySortableSerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes, - order), false); + order, nullPosition), false); System.out.println("Test testTBinarySortableProtocol passed!"); } catch (Throwable e) {