diff --git a/data/files/type_evolution.avro b/data/files/type_evolution.avro new file mode 100644 index 0000000..3b6445f Binary files /dev/null and b/data/files/type_evolution.avro differ diff --git a/ql/src/test/queries/clientpositive/avro_type_evolution.q b/ql/src/test/queries/clientpositive/avro_type_evolution.q new file mode 100644 index 0000000..4c9539c --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_type_evolution.q @@ -0,0 +1,20 @@ +-- File Schema { "name" : "val", "type" : [ "null", "int" ] } +-- Record Schema { "name" : "val", "type" : [ "long", "null" ] } + +DROP TABLE IF EXISTS avro_type_evolution; + +CREATE TABLE avro_type_evolution (val bigint) STORED AS AVRO +TBLPROPERTIES ( + 'avro.schema.literal'='{ + "type" : "record", + "name" : "type_evolution", + "namespace" : "default", + "fields" : [ { + "name" : "val", + "type" : [ "long", "null" ] + } ] +}'); +LOAD DATA LOCAL INPATH '../../data/files/type_evolution.avro' OVERWRITE INTO TABLE avro_type_evolution; +SELECT * FROM avro_type_evolution; + +DROP TABLE avro_type_evolution; diff --git a/ql/src/test/results/clientpositive/avro_type_evolution.q.out b/ql/src/test/results/clientpositive/avro_type_evolution.q.out new file mode 100644 index 0000000..b302d9a --- /dev/null +++ b/ql/src/test/results/clientpositive/avro_type_evolution.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: -- File Schema { "name" : "val", "type" : [ "null", "int" ] } +-- Record Schema { "name" : "val", "type" : [ "long", "null" ] } + +DROP TABLE IF EXISTS avro_type_evolution +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- File Schema { "name" : "val", "type" : [ "null", "int" ] } +-- Record Schema { "name" : "val", "type" : [ "long", "null" ] } + +DROP TABLE IF EXISTS avro_type_evolution +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_type_evolution (val bigint) STORED AS AVRO +TBLPROPERTIES ( + 'avro.schema.literal'='{ + "type" : "record", + "name" : "type_evolution", + "namespace" : "default", + "fields" : [ { + "name" : "val", + "type" : [ "long", "null" ] + } ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE avro_type_evolution (val bigint) STORED AS AVRO +TBLPROPERTIES ( + 'avro.schema.literal'='{ + "type" : "record", + "name" : "type_evolution", + "namespace" : "default", + "fields" : [ { + "name" : "val", + "type" : [ "long", "null" ] + } ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_type_evolution +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/type_evolution.avro' OVERWRITE INTO TABLE avro_type_evolution +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_type_evolution +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/type_evolution.avro' OVERWRITE INTO TABLE avro_type_evolution +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_type_evolution +PREHOOK: query: SELECT * FROM avro_type_evolution +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_type_evolution +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_type_evolution +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_type_evolution +#### A masked pattern was here #### +1 +PREHOOK: query: DROP TABLE avro_type_evolution +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_type_evolution +PREHOOK: Output: default@avro_type_evolution +POSTHOOK: query: DROP TABLE avro_type_evolution +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_type_evolution +POSTHOOK: Output: default@avro_type_evolution diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index fc50d32..6890042 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -40,6 +40,7 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; +import org.apache.avro.UnresolvedUnionException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; @@ -309,8 +310,14 @@ private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema if (fileSchema.getType() == Type.UNION) { // The fileSchema may have the null value in a different position, so // we need to get the correct tag - tag = GenericData.get().resolveUnion(fileSchema, datum); - currentFileSchema = fileSchema.getTypes().get(tag); + try { + tag = GenericData.get().resolveUnion(fileSchema, datum); + currentFileSchema = fileSchema.getTypes().get(tag); + } catch (UnresolvedUnionException e) { + // This occurs when the datum type is different between + // the file and record schema. For example if datum is long + // and the field in the file schema is int + } } else { currentFileSchema = fileSchema; }