diff --git a/ql/src/test/queries/clientpositive/avro_type_evolution.q b/ql/src/test/queries/clientpositive/avro_type_evolution.q new file mode 100644 index 0000000..4c9539c --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_type_evolution.q @@ -0,0 +1,20 @@ +-- File Schema { "name" : "val", "type" : [ "null", "int" ] } +-- Record Schema { "name" : "val", "type" : [ "long", "null" ] } + +DROP TABLE IF EXISTS avro_type_evolution; + +CREATE TABLE avro_type_evolution (val bigint) STORED AS AVRO +TBLPROPERTIES ( + 'avro.schema.literal'='{ + "type" : "record", + "name" : "type_evolution", + "namespace" : "default", + "fields" : [ { + "name" : "val", + "type" : [ "long", "null" ] + } ] +}'); +LOAD DATA LOCAL INPATH '../../data/files/type_evolution.avro' OVERWRITE INTO TABLE avro_type_evolution; +SELECT * FROM avro_type_evolution; + +DROP TABLE avro_type_evolution; diff --git a/ql/src/test/results/clientpositive/avro_type_evolution.q.out b/ql/src/test/results/clientpositive/avro_type_evolution.q.out new file mode 100644 index 0000000..71ec99b --- /dev/null +++ b/ql/src/test/results/clientpositive/avro_type_evolution.q.out @@ -0,0 +1,63 @@ +PREHOOK: query: -- File Schema { "name" : "val", "type" : [ "null", "int" ] } +-- Record Schema { "name" : "val", "type" : [ "long", "null" ] } + +DROP TABLE IF EXISTS avro_type_evolution +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- File Schema { "name" : "val", "type" : [ "null", "int" ] } +-- Record Schema { "name" : "val", "type" : [ "long", "null" ] } + +DROP TABLE IF EXISTS avro_type_evolution +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_type_evolution (val bigint) STORED AS AVRO +TBLPROPERTIES ( + 'avro.schema.literal'='{ + "type" : "record", + "name" : "type_evolution", + "namespace" : "default", + "fields" : [ { + "name" : "val", + "type" : [ "long", "null" ] + } ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_type_evolution +POSTHOOK: query: CREATE TABLE avro_type_evolution (val bigint) STORED AS AVRO +TBLPROPERTIES ( + 'avro.schema.literal'='{ + "type" : "record", + "name" : "type_evolution", + "namespace" : "default", + "fields" : [ { + "name" : "val", + "type" : [ "long", "null" ] + } ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_type_evolution +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/type_evolution.avro' OVERWRITE INTO TABLE avro_type_evolution +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_type_evolution +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/type_evolution.avro' OVERWRITE INTO TABLE avro_type_evolution +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_type_evolution +PREHOOK: query: SELECT * FROM avro_type_evolution +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_type_evolution +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_type_evolution +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_type_evolution +#### A masked pattern was here #### +1 +PREHOOK: query: DROP TABLE avro_type_evolution +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_type_evolution +PREHOOK: Output: default@avro_type_evolution +POSTHOOK: query: DROP TABLE avro_type_evolution +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_type_evolution +POSTHOOK: Output: default@avro_type_evolution diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index d107291..a2558f2 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -41,6 +41,7 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; +import org.apache.avro.UnresolvedUnionException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; @@ -316,8 +317,27 @@ private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema if (fileSchema.getType() == Type.UNION) { // The fileSchema may have the null value in a different position, so // we need to get the correct tag - tag = GenericData.get().resolveUnion(fileSchema, datum); - currentFileSchema = fileSchema.getTypes().get(tag); + try { + tag = GenericData.get().resolveUnion(fileSchema, datum); + currentFileSchema = fileSchema.getTypes().get(tag); + } catch (UnresolvedUnionException e) { + if (LOG.isDebugEnabled()) { + String datumClazz = null; + if (datum != null) { + datumClazz = datum.getClass().getName(); + } + String msg = "File schema union could not resolve union. fileSchema = " + fileSchema + + ", recordSchema = " + recordSchema + ", datum class = " + datumClazz + ": " + e; + LOG.debug(msg, e); + } + // This occurs when the datum type is different between + // the file and record schema. For example if datum is long + // and the field in the file schema is int. See HIVE-9462. + // in this case we will re-use the record schema as the file + // schema, Ultimately we need to clean this code up and will + // do as a follow-on to HIVE-9462. + currentFileSchema = schema; + } } else { currentFileSchema = fileSchema; }