From 5a7309563f2fe68b575e18350d2dc5816ce51a97 Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Mon, 28 Jul 2014 11:08:20 -0500 Subject: [PATCH] HIVE-5865: Refer to map key type as CharSequence instead of Utf8 --- .../hadoop/hive/serde2/avro/AvroDeserializer.java | 5 +- .../hive/serde2/avro/TestAvroDeserializer.java | 61 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java index 29262ba..50c14e2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java @@ -40,7 +40,6 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; -import org.apache.avro.util.Utf8; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; @@ -370,10 +369,10 @@ private Object deserializeMap(Object datum, Schema fileSchema, Schema mapSchema, // Avro only allows maps with Strings for keys, so we only have to worry // about deserializing the values Map map = new HashMap(); - Map mapDatum = (Map)datum; + Map mapDatum = (Map)datum; Schema valueSchema = mapSchema.getValueType(); TypeInfo valueTypeInfo = columnType.getMapValueTypeInfo(); - for (Utf8 key : mapDatum.keySet()) { + for (CharSequence key : mapDatum.keySet()) { Object value = mapDatum.get(key); map.put(key.toString(), worker(value, fileSchema == null ? null : fileSchema.getValueType(), valueSchema, valueTypeInfo)); diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java index 3a33239..15416a7 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java @@ -475,6 +475,67 @@ public void canDeserializeMapWithNullablePrimitiveValues() throws SerDeException assertTrue(theMap2.containsKey("mu")); assertEquals(null, theMap2.get("mu")); } + + @Test + public void canDeserializeMapsWithJavaLangStringKeys() throws IOException, SerDeException { + // Ensures maps can be deserialized when avro.java.string=String. + // See http://stackoverflow.com/a/19868919/312944 for why that might be used. + String schemaString = "{\n" + + " \"namespace\": \"testing\",\n" + + " \"name\": \"oneMap\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aMap\",\n" + + " \"type\":{\"type\":\"map\",\n" + + " \"avro.java.string\":\"String\",\n" + + " \"values\":\"long\"}\n" + + "\t}\n" + + " ]\n" + + "}"; + Schema s = AvroSerdeUtils.getSchemaFor(schemaString); + GenericData.Record record = new GenericData.Record(s); + + Map m = new Hashtable(); + m.put("one", 1l); + m.put("two", 2l); + m.put("three", 3l); + + record.put("aMap", m); + assertTrue(GENERIC_DATA.validate(s, record)); + System.out.println("record = " + record); + + AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); + + AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); + + AvroDeserializer de = new AvroDeserializer(); + + ArrayList row = (ArrayList)de.deserialize(aoig.getColumnNames(), + aoig.getColumnTypes(), garw, s); + assertEquals(1, row.size()); + Object theMapObject = row.get(0); + assertTrue(theMapObject instanceof Map); + Map theMap = (Map)theMapObject; + + // Verify the raw object that's been created + assertEquals(1l, theMap.get("one")); + assertEquals(2l, theMap.get("two")); + assertEquals(3l, theMap.get("three")); + + // Verify that the provided object inspector can pull out these same values + StandardStructObjectInspector oi = + (StandardStructObjectInspector)aoig.getObjectInspector(); + + List z = oi.getStructFieldsDataAsList(row); + assertEquals(1, z.size()); + StructField fieldRef = oi.getStructFieldRef("amap"); + + Map theMap2 = (Map)oi.getStructFieldData(row, fieldRef); + assertEquals(1l, theMap2.get("one")); + assertEquals(2l, theMap2.get("two")); + assertEquals(3l, theMap2.get("three")); + } private void verifyNullableType(GenericData.Record record, Schema s, String fieldName, String expected) throws SerDeException, IOException { -- 1.8.5.2 (Apple Git-48)