diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index 50c14e2..07c5ecf 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -200,7 +200,7 @@ private Object worker(Object datum, Schema fileSchema, Schema recordSchema, Type // Avro requires NULLable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if(AvroSerdeUtils.isNullableType(recordSchema)) { - return deserializeNullableUnion(datum, fileSchema, recordSchema, columnType); + return deserializeNullableUnion(datum, fileSchema, recordSchema); } switch(columnType.getCategory()) { @@ -296,8 +296,8 @@ private Object deserializePrimitive(Object datum, Schema fileSchema, Schema reco * Extract either a null or the correct type from a Nullable type. This is * horrible in that we rebuild the TypeInfo every time. */ - private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema recordSchema, - TypeInfo columnType) throws AvroSerdeException { + private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema recordSchema) + throws AvroSerdeException { int tag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of value Schema schema = recordSchema.getTypes().get(tag); if (schema.getType().equals(Schema.Type.NULL)) { @@ -306,8 +306,14 @@ private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema Schema currentFileSchema = null; if (fileSchema != null) { - currentFileSchema = - fileSchema.getType() == Type.UNION ? fileSchema.getTypes().get(tag) : fileSchema; + if (fileSchema.getType() == Type.UNION) { + // The fileSchema may have the null value in a different position, so + // we need to get the correct tag + tag = GenericData.get().resolveUnion(fileSchema, datum); + currentFileSchema = fileSchema.getTypes().get(tag); + } else { + currentFileSchema = fileSchema; + } } return worker(datum, currentFileSchema, schema, SchemaToTypeInfo.generateTypeInfo(schema));