diff --git ql/src/test/queries/clientpositive/avro_add_column.q ql/src/test/queries/clientpositive/avro_add_column.q new file mode 100644 index 0000000..17dc2ff --- /dev/null +++ ql/src/test/queries/clientpositive/avro_add_column.q @@ -0,0 +1,17 @@ +-- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string) +STORED AS AVRO; + +DESCRIBE doctors; + +ALTER TABLE doctors ADD COLUMNS (last_name string); + +DESCRIBE doctors; + +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors; + +SELECT * FROM doctors; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/avro_add_column2.q ql/src/test/queries/clientpositive/avro_add_column2.q new file mode 100644 index 0000000..36999da --- /dev/null +++ ql/src/test/queries/clientpositive/avro_add_column2.q @@ -0,0 +1,25 @@ +-- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO; + +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors; + +CREATE TABLE doctors_copy ( + number int, + first_name string) +STORED AS AVRO; + +INSERT INTO TABLE doctors_copy SELECT number, first_name FROM doctors; + +ALTER TABLE doctors_copy ADD COLUMNS (last_name string); + +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_copy; + +DESCRIBE doctors_copy; + +SELECT * FROM doctors_copy; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/avro_add_column3.q ql/src/test/queries/clientpositive/avro_add_column3.q new file mode 100644 index 0000000..f33ee03 --- /dev/null +++ ql/src/test/queries/clientpositive/avro_add_column3.q @@ -0,0 +1,24 @@ +-- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO; + +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors; + +CREATE TABLE doctors_copy ( + number int, + first_name string) +PARTITIONED BY (part int) +STORED AS AVRO; + +INSERT INTO TABLE doctors_copy PARTITION(part=1) SELECT number, first_name FROM doctors; + +ALTER TABLE doctors_copy ADD COLUMNS (last_name string); + +DESCRIBE doctors_copy; + +SELECT * FROM doctors_copy; \ No newline at end of file diff --git ql/src/test/results/clientpositive/avro_add_column.q.out ql/src/test/results/clientpositive/avro_add_column.q.out new file mode 100644 index 0000000..163feca --- /dev/null +++ ql/src/test/results/clientpositive/avro_add_column.q.out @@ -0,0 +1,71 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors +PREHOOK: query: DESCRIBE doctors +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors +POSTHOOK: query: DESCRIBE doctors +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors +number int from deserializer +first_name string from deserializer +PREHOOK: query: ALTER TABLE doctors ADD COLUMNS (last_name string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@doctors +PREHOOK: Output: default@doctors +POSTHOOK: query: ALTER TABLE doctors ADD COLUMNS (last_name string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@doctors +POSTHOOK: Output: default@doctors +PREHOOK: query: DESCRIBE doctors +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors +POSTHOOK: query: DESCRIBE doctors +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors +number int from deserializer +first_name string from deserializer +last_name string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors +PREHOOK: query: SELECT * FROM doctors +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors +#### A masked pattern was here #### +1 William Hartnell +10 David Tennant +11 Matt Smith +2 Patrick Troughton +3 Jon Pertwee +4 Tom Baker +5 Peter Davison +6 Colin Baker +7 Sylvester McCoy +8 Paul McGann +9 Christopher Eccleston diff --git ql/src/test/results/clientpositive/avro_add_column2.q.out ql/src/test/results/clientpositive/avro_add_column2.q.out new file mode 100644 index 0000000..32a6f5e --- /dev/null +++ ql/src/test/results/clientpositive/avro_add_column2.q.out @@ -0,0 +1,107 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors +PREHOOK: query: CREATE TABLE doctors_copy ( + number int, + first_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE doctors_copy ( + number int, + first_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: INSERT INTO TABLE doctors_copy SELECT number, first_name FROM doctors +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: INSERT INTO TABLE doctors_copy SELECT number, first_name FROM doctors +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors +POSTHOOK: Output: default@doctors_copy +POSTHOOK: Lineage: doctors_copy.first_name SIMPLE [(doctors)doctors.FieldSchema(name:first_name, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: doctors_copy.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ] +PREHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@doctors_copy +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@doctors_copy +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_copy +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_copy +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: DESCRIBE doctors_copy +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors_copy +POSTHOOK: query: DESCRIBE doctors_copy +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors_copy +number int from deserializer +first_name string from deserializer +last_name string from deserializer +PREHOOK: query: SELECT * FROM doctors_copy +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors_copy +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors_copy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors_copy +#### A masked pattern was here #### +1 William Hartnell +1 William NULL +10 David NULL +10 David Tennant +11 Matt NULL +11 Matt Smith +2 Patrick NULL +2 Patrick Troughton +3 Jon NULL +3 Jon Pertwee +4 Tom Baker +4 Tom NULL +5 Peter Davison +5 Peter NULL +6 Colin Baker +6 Colin NULL +7 Sylvester McCoy +7 Sylvester NULL +8 Paul McGann +8 Paul NULL +9 Christopher Eccleston +9 Christopher NULL diff --git ql/src/test/results/clientpositive/avro_add_column3.q.out ql/src/test/results/clientpositive/avro_add_column3.q.out new file mode 100644 index 0000000..fe934e1 --- /dev/null +++ ql/src/test/results/clientpositive/avro_add_column3.q.out @@ -0,0 +1,98 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors +PREHOOK: query: CREATE TABLE doctors_copy ( + number int, + first_name string) +PARTITIONED BY (part int) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE doctors_copy ( + number int, + first_name string) +PARTITIONED BY (part int) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: INSERT INTO TABLE doctors_copy PARTITION(part=1) SELECT number, first_name FROM doctors +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors +PREHOOK: Output: default@doctors_copy@part=1 +POSTHOOK: query: INSERT INTO TABLE doctors_copy PARTITION(part=1) SELECT number, first_name FROM doctors +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors +POSTHOOK: Output: default@doctors_copy@part=1 +POSTHOOK: Lineage: doctors_copy PARTITION(part=1).first_name SIMPLE [(doctors)doctors.FieldSchema(name:first_name, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: doctors_copy PARTITION(part=1).number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ] +PREHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@doctors_copy +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@doctors_copy +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: DESCRIBE doctors_copy +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors_copy +POSTHOOK: query: DESCRIBE doctors_copy +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors_copy +number int from deserializer +first_name string from deserializer +last_name string from deserializer +part int + +# Partition Information +# col_name data_type comment + +part int +PREHOOK: query: SELECT * FROM doctors_copy +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors_copy +PREHOOK: Input: default@doctors_copy@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors_copy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors_copy +POSTHOOK: Input: default@doctors_copy@part=1 +#### A masked pattern was here #### +1 William NULL 1 +10 David NULL 1 +11 Matt NULL 1 +2 Patrick NULL 1 +3 Jon NULL 1 +4 Tom NULL 1 +5 Peter NULL 1 +6 Colin NULL 1 +7 Sylvester NULL 1 +8 Paul NULL 1 +9 Christopher NULL 1 diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java index 915f016..497a49c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java @@ -26,6 +26,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.node.JsonNodeFactory; import java.util.ArrayList; import java.util.Arrays; @@ -216,13 +218,14 @@ private Schema createAvroArray(TypeInfo typeInfo) { private List getFields(Schema.Field schemaField) { List fields = new ArrayList(); + JsonNode nullDefault = JsonNodeFactory.instance.nullNode(); if (schemaField.schema().getType() == Schema.Type.RECORD) { for (Schema.Field field : schemaField.schema().getFields()) { - fields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null)); + fields.add(new Schema.Field(field.name(), field.schema(), field.doc(), nullDefault)); } } else { fields.add(new Schema.Field(schemaField.name(), schemaField.schema(), schemaField.doc(), - null)); + nullDefault)); } return fields; diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java index 722bdf9..0f53e31 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestTypeInfoToSchema.java @@ -86,7 +86,8 @@ private String genSchemaWithoutNull(String specificSchema) { "\"fields\":[" + "{\"name\":\"testCol\"," + "\"type\":" + specificSchema + "," + - "\"doc\":\"\"}" + + "\"doc\":\"\"," + + "\"default\":null}" + "]}"; } diff --git serde/src/test/resources/avro-nested-struct.avsc serde/src/test/resources/avro-nested-struct.avsc index 785af83..4363cca 100644 --- serde/src/test/resources/avro-nested-struct.avsc +++ serde/src/test/resources/avro-nested-struct.avsc @@ -5,15 +5,15 @@ "doc":"struct>", "fields": [ -{"name":"superfield1","type":["null","string"],"doc":"string"}, +{"name":"superfield1","type":["null","string"],"doc":"string","default":null}, {"name":"superfield2","type":["null",{"type":"record","name":"record_0", "doc":"struct", "fields": [ -{"name":"field1","type":["null","string"],"doc":"string"}, -{"name":"field2","type":["null","int"],"doc":"int"} +{"name":"field1","type":["null","string"],"doc":"string","default":null}, +{"name":"field2","type":["null","int"],"doc":"int","default":null} ] } ], -"doc":"struct"}] +"doc":"struct","default":null}] } \ No newline at end of file diff --git serde/src/test/resources/avro-struct.avsc serde/src/test/resources/avro-struct.avsc index 313c74f..c8c83d7 100644 --- serde/src/test/resources/avro-struct.avsc +++ serde/src/test/resources/avro-struct.avsc @@ -6,19 +6,19 @@ field6:smallint,field7:int,field8:bigint,field9:float,field10:double,field11:boolean, field12:decimal(4,2),field13:void>", "fields":[ -{"name":"field1","type":["null","string"],"doc":"string"}, -{"name":"field2","type":["null","string"],"doc":"char(5)"}, -{"name":"field3","type":["null","string"],"doc":"varchar(5)"}, -{"name":"field4","type":["null","bytes"],"doc":"binary"}, -{"name":"field5","type":["null","int"],"doc":"tinyint"}, -{"name":"field6","type":["null","int"],"doc":"smallint"}, -{"name":"field7","type":["null","int"],"doc":"int"}, -{"name":"field8","type":["null","long"],"doc":"bigint"}, -{"name":"field9","type":["null","float"],"doc":"float"}, -{"name":"field10","type":["null","double"],"doc":"double"}, -{"name":"field11","type":["null","boolean"],"doc":"boolean"}, +{"name":"field1","type":["null","string"],"doc":"string","default":null}, +{"name":"field2","type":["null","string"],"doc":"char(5)","default":null}, +{"name":"field3","type":["null","string"],"doc":"varchar(5)","default":null}, +{"name":"field4","type":["null","bytes"],"doc":"binary","default":null}, +{"name":"field5","type":["null","int"],"doc":"tinyint","default":null}, +{"name":"field6","type":["null","int"],"doc":"smallint","default":null}, +{"name":"field7","type":["null","int"],"doc":"int","default":null}, +{"name":"field8","type":["null","long"],"doc":"bigint","default":null}, +{"name":"field9","type":["null","float"],"doc":"float","default":null}, +{"name":"field10","type":["null","double"],"doc":"double","default":null}, +{"name":"field11","type":["null","boolean"],"doc":"boolean","default":null}, {"name":"field12","type":["null",{"type":"bytes","logicalType":"decimal","precision":4, -"scale":2}],"doc":"decimal(4,2)"}, -{"name":"field13","type":"null","doc":"void"} +"scale":2}],"doc":"decimal(4,2)","default":null}, +{"name":"field13","type":"null","doc":"void","default":null} ] } \ No newline at end of file