diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index d7a25b6..cba68b2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -351,10 +351,13 @@ private Object deserializeStruct(GenericData.Record datum, Schema fileSchema, St private Object deserializeUnion(Object datum, Schema fileSchema, Schema recordSchema, UnionTypeInfo columnType) throws AvroSerdeException { - int tag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of value - Object desered = worker(datum, fileSchema == null ? null : fileSchema.getTypes().get(tag), - recordSchema.getTypes().get(tag), columnType.getAllUnionObjectTypeInfos().get(tag)); - return new StandardUnionObjectInspector.StandardUnion((byte)tag, desered); + // Calculate tags individually since the schema can evolve and can have different tags. In worst case, both schemas are same + // and we would end up doing calculations twice to get the same tag + int fsTag = GenericData.get().resolveUnion(fileSchema, datum); // Determine index of value from fileSchema + int rsTag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of value from recordSchema + Object desered = worker(datum, fileSchema == null ? null : fileSchema.getTypes().get(fsTag), + recordSchema.getTypes().get(rsTag), columnType.getAllUnionObjectTypeInfos().get(rsTag)); + return new StandardUnionObjectInspector.StandardUnion((byte)rsTag, desered); } private Object deserializeList(Object datum, Schema fileSchema, Schema recordSchema, diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java index 15416a7..1233108 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java @@ -266,17 +266,72 @@ public void canDeserializeUnions() throws SerDeException, IOException { uoi = (UnionObjectInspector)result.oi; assertEquals(0, uoi.getTag(result.unionObject)); } + + @Test + public void canDeserializeEvolvedUnions1() throws SerDeException, IOException { + Schema ws = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.UNION_SCHEMA); + Schema rs = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.UNION_SCHEMA_2); + + GenericData.Record record = new GenericData.Record(ws); + + record.put("aUnion", "this is a string"); + + ResultPair result = unionTester(ws, rs, record); + assertTrue(result.value instanceof String); + assertEquals("this is a string", result.value); + UnionObjectInspector uoi = (UnionObjectInspector)result.oi; + assertEquals(2, uoi.getTag(result.unionObject)); - private ResultPair unionTester(Schema s, GenericData.Record record) + // Now the other enum possibility + record = new GenericData.Record(ws); + record.put("aUnion", 99); + result = unionTester(ws, rs, record); + assertTrue(result.value instanceof Integer); + assertEquals(99, result.value); + uoi = (UnionObjectInspector)result.oi; + assertEquals(1, uoi.getTag(result.unionObject)); + } + + @Test + public void canDeserializeEvolvedUnions2() throws SerDeException, IOException { + Schema ws = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.UNION_SCHEMA_3); + Schema rs = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.UNION_SCHEMA_4); + + GenericData.Record record = new GenericData.Record(ws); + + record.put("aUnion", 90); + + ResultPair result = unionTester(ws, rs, record); + assertTrue(result.value instanceof Integer); + assertEquals(90, result.value); + UnionObjectInspector uoi = (UnionObjectInspector)result.oi; + assertEquals(0, uoi.getTag(result.unionObject)); + + // Now the other enum possibility + record = new GenericData.Record(ws); + record.put("aUnion", 99.9f); + result = unionTester(ws, rs, record); + assertTrue(result.value instanceof Float); + assertEquals(99.9f, result.value); + uoi = (UnionObjectInspector)result.oi; + assertEquals(1, uoi.getTag(result.unionObject)); + } + + private ResultPair unionTester(Schema ws, GenericData.Record record) throws SerDeException, IOException { - assertTrue(GENERIC_DATA.validate(s, record)); + return unionTester(ws, ws, record); + } + + private ResultPair unionTester(Schema ws, Schema rs, GenericData.Record record) + throws SerDeException, IOException { + assertTrue(GENERIC_DATA.validate(ws, record)); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); - AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); + AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(rs); AvroDeserializer de = new AvroDeserializer(); ArrayList row = - (ArrayList)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); + (ArrayList)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, rs); assertEquals(1, row.size()); StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); List fieldRefs = oi.getAllStructFieldRefs(); diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java index 337b44e..4c5197a 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java @@ -111,6 +111,39 @@ " }\n" + " ]\n" + "}"; + public static final String UNION_SCHEMA_2 = "{\n" + + " \"namespace\": \"test.a.rossa\",\n" + + " \"name\": \"oneUnion\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aUnion\",\n" + + " \"type\":[\"null\", \"int\", \"string\"]\n" + + " }\n" + + " ]\n" + + "}"; + public static final String UNION_SCHEMA_3 = "{\n" + + " \"namespace\": \"test.a.rossa\",\n" + + " \"name\": \"oneUnion\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aUnion\",\n" + + " \"type\":[\"float\",\"int\"]\n" + + " }\n" + + " ]\n" + + "}"; + public static final String UNION_SCHEMA_4 = "{\n" + + " \"namespace\": \"test.a.rossa\",\n" + + " \"name\": \"oneUnion\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aUnion\",\n" + + " \"type\":[\"int\",\"float\",\"long\"]\n" + + " }\n" + + " ]\n" + + "}"; public static final String ENUM_SCHEMA = "{\n" + " \"namespace\": \"clever.namespace.name.in.space\",\n" + " \"name\": \"oneEnum\",\n" +