Index: serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java (revision 1394121) +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java (working copy) @@ -25,17 +25,20 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.Writable; import org.junit.Test; +import org.junit.Ignore; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.Hashtable; +import java.util.HashMap; import java.util.List; import java.util.Map; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @@ -197,7 +200,7 @@ } @Test - public void canSerializeNullableTypes() throws SerDeException, IOException { + public void canSerializeNullablePrimitiveTypes() throws SerDeException, IOException { String field = "{ \"name\":\"nullableint\", \"type\":[\"int\", \"null\"] }"; GenericRecord r = serializeAndDeserialize(field, "nullableint", 42); assertEquals(42, r.get("nullableint")); @@ -207,6 +210,176 @@ } @Test + public void canSerializeNullableRecords() throws SerDeException, IOException { + String field = "{ \"name\":\"nullableStruct\", \"type\": [\"null\", {\"type\":\"record\", " + + "\"name\":\"struct1_name\", \"fields\": [\n" + + "{ \"name\":\"sInt\", \"type\":\"int\" }, " + + "{ \"name\":\"sBoolean\", \"type\":\"boolean\" }, " + + "{ \"name\":\"sString\", \"type\":\"string\" } ] }] }"; + + Schema s = buildSchema(field); + Schema nullable = s.getField("nullableStruct").schema(); + assertTrue(AvroSerdeUtils.isNullableType(nullable)); + GenericData.Record innerRecord = + new GenericData.Record(AvroSerdeUtils.getOtherTypeFromNullableType(nullable)); + + innerRecord.put("sInt", 77); + innerRecord.put("sBoolean", false); + innerRecord.put("sString", "tedious"); + + GenericRecord r = serializeAndDeserialize(field, "nullableStruct", innerRecord); + Object result = r.get("nullableStruct"); + assertNotSame(innerRecord, result); + assertEquals(innerRecord, result); + + r = serializeAndDeserialize(field, "nullableStruct", null); + assertNull(r.get("nullableStruct")); + } + + @Test + public void canSerializeNullableLists() throws SerDeException, IOException { + List intList = new ArrayList(); + Collections.addAll(intList, 1,2, 3); + String field = "{ \"name\":\"nullableList\", \"type\": [\"null\", " + + "{\"type\":\"array\", \"items\":\"int\"}] }"; + GenericRecord r = serializeAndDeserialize(field, "nullableList", intList); + Object result = r.get("nullableList"); + assertNotSame(intList, result); + assertEquals(intList, result); + + r = serializeAndDeserialize(field, "nullableList", null); + assertNull(r.get("nullableList")); + } + + @Test + public void canSerializeNullableMaps() throws SerDeException, IOException { + String field = "{ \"name\":\"nullableMap\", \"type\": [\"null\", " + + "{\"type\":\"map\", \"values\":\"boolean\"}] }"; + + Map m = new Hashtable(); + m.put("yes", true); + m.put("no", false); + GenericRecord r = serializeAndDeserialize(field, "nullableMap", m); + + Object result = r.get("nullableMap"); + assertNotSame(m, result); + assertEquals(m, result); + + r = serializeAndDeserialize(field, "nullableMap", null); + assertNull(r.get("nullableMap")); + } + + @Test + public void canSerializeNullableFixed() throws SerDeException, IOException { + String field = "{ \"name\":\"nullableFixed\", \"type\": [\"null\", " + + "{\"type\":\"fixed\", \"name\":\"threebytes\", \"size\":3}] }"; + Schema s = buildSchema(field); + Schema nullable = s.getField("nullableFixed").schema(); + assertTrue(AvroSerdeUtils.isNullableType(nullable)); + + GenericData.Fixed fixed = new GenericData.Fixed( + AvroSerdeUtils.getOtherTypeFromNullableType(nullable), "k9@".getBytes()); + GenericRecord r = serializeAndDeserialize(field, "nullableFixed", fixed); + + GenericData.Fixed result = (GenericData.Fixed) r.get("nullableFixed"); + assertNotSame(fixed, result); + assertArrayEquals(fixed.bytes(), result.bytes()); + + r = serializeAndDeserialize(field, "nullableFixed", null); + assertNull(r.get("nullableFixed")); + } + + @Test + public void canSerializeNullableBytes() throws SerDeException, IOException { + String field = "{ \"name\":\"nullableBytes\", \"type\":[\"null\", \"bytes\"] }"; + ByteBuffer bb = ByteBuffer.wrap("easy as one two three".getBytes()); + bb.rewind(); + GenericRecord r = serializeAndDeserialize(field, "nullableBytes", bb); + + Object result = r.get("nullableBytes"); + assertNotSame(bb, result); + assertEquals(bb, result); + + r = serializeAndDeserialize(field, "nullableBytes", null); + assertNull(r.get("nullableBytes")); + } + + @Test + public void canSerializeArraysWithNullablePrimitiveElements() throws SerDeException, IOException { + final String field = "{ \"name\":\"listWithNulls\", \"type\": " + + "{\"type\":\"array\", \"items\": [\"null\", \"int\"]} }"; + List intList = new ArrayList(); + Collections.addAll(intList, 1,2, null, 3); + GenericRecord r = serializeAndDeserialize(field, "listWithNulls", intList); + Object result = r.get("listWithNulls"); + assertNotSame(intList, result); + assertEquals(intList, result); + } + + @Test + public void canSerializeArraysWithNullableComplexElements() throws SerDeException, IOException { + final String field = "{ \"name\":\"listOfNullableLists\", \"type\": " + + "{\"type\":\"array\", \"items\": [\"null\", " + + "{\"type\": \"array\", \"items\": \"int\"}]} }"; + List> intListList = new ArrayList>(); + List intList = new ArrayList(); + Collections.addAll(intList, 1,2,3); + Collections.addAll(intListList, intList, null); + GenericRecord r = serializeAndDeserialize(field, "listOfNullableLists", intListList); + Object result = r.get("listOfNullableLists"); + assertNotSame(intListList, result); + assertEquals(intListList, result); + } + + @Test + public void canSerializeRecordsWithNullableComplexElements() throws SerDeException, IOException { + String field = "{ \"name\":\"struct1\", \"type\":{\"type\":\"record\", " + + "\"name\":\"struct1_name\", \"fields\": [\n" + + "{ \"name\":\"sInt\", \"type\":\"int\" }, { \"name\"" + + ":\"sBoolean\", \"type\":\"boolean\" }, { \"name\":\"nullableList\", \"type\":[\"null\", " + + "{ \"type\":\"array\", \"items\":\"int\"}] } ] } }"; + + Schema s = buildSchema(field); + GenericData.Record innerRecord = new GenericData.Record(s.getField("struct1").schema()); + + innerRecord.put("sInt", 77); + innerRecord.put("sBoolean", false); + List intList = new ArrayList(); + Collections.addAll(intList, 1,2,3); + innerRecord.put("nullableList", intList); + + GenericRecord r = serializeAndDeserialize(field, "struct1", innerRecord); + Object result = r.get("struct1"); + assertNotSame(innerRecord, result); + assertEquals(innerRecord, result); + + innerRecord.put("nullableList", null); + r = serializeAndDeserialize(field, "struct1", innerRecord); + result = r.get("struct1"); + assertNotSame(innerRecord, result); + assertEquals(innerRecord, result); + } + + @Ignore("Currently fails because of HIVE-3525") + @Test + public void canSerializeMapsWithNullableComplexValues() throws SerDeException, IOException { + String field = "{ \"name\":\"mapWithNullableLists\", \"type\": " + + "{\"type\":\"map\", \"values\": [\"null\", " + + "{\"type\": \"array\", \"items\": \"int\"}]} }"; + + Map> m = new HashMap>(); + List intList = new ArrayList(); + Collections.addAll(intList, 1,2,3); + m.put("list", intList); + m.put("null", null); + GenericRecord r = serializeAndDeserialize(field, "mapWithNullableLists", m); + + Object result = r.get("mapWithNullableLists"); + assertNotSame(m, result); + assertEquals(m, result); + } + + @Test public void canSerializeBytes() throws SerDeException, IOException { String field = "{ \"name\":\"bytes1\", \"type\":\"bytes\" }"; ByteBuffer bb = ByteBuffer.wrap("easy as one two three".getBytes()); Index: serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java (revision 1394121) +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java (working copy) @@ -49,6 +49,7 @@ class AvroSerializer { private static final Log LOG = LogFactory.getLog(AvroSerializer.class); + final Schema STRING_SCHEMA = Schema.create(Schema.Type.STRING); AvroGenericRecordWritable cache = new AvroGenericRecordWritable(); // Hive is pretty simple (read: stupid) in writing out values via the serializer. @@ -90,6 +91,12 @@ } private Object serialize(TypeInfo typeInfo, ObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { + if(AvroSerdeUtils.isNullableType(schema)) { + schema = AvroSerdeUtils.getOtherTypeFromNullableType(schema); + } + if(null == structFieldData) { + return null; + } switch(typeInfo.getCategory()) { case PRIMITIVE: assert fieldOI instanceof PrimitiveObjectInspector; @@ -232,7 +239,7 @@ Map deserialized = new Hashtable(fieldOI.getMapSize(structFieldData)); for (Map.Entry entry : map.entrySet()) { - deserialized.put(serialize(mapKeyTypeInfo, mapKeyObjectInspector, entry.getKey(), null), // This works, but is a bit fragile. Construct a single String schema? + deserialized.put(serialize(mapKeyTypeInfo, mapKeyObjectInspector, entry.getKey(), STRING_SCHEMA), serialize(mapValueTypeInfo, mapValueObjectInspector, entry.getValue(), valueType)); }