diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java index 211c733..db521e6 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.ReflectionUtils; /** * LazySimpleSerDe can be used to read the same data format as @@ -323,6 +324,7 @@ public class LazySimpleSerDe implements SerDe { if (byteArrayRef == null) { byteArrayRef = new ByteArrayRef(); } + String serDeString; if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; // For backward-compatibility with hadoop 0.17 @@ -332,14 +334,53 @@ public class LazySimpleSerDe implements SerDe { Text t = (Text) field; byteArrayRef.setData(t.getBytes()); cachedLazyStruct.init(byteArrayRef, 0, t.getLength()); + } else if ((serDeString = callToStringOnObject(field)) != null) { + byteArrayRef.setData(serDeString.getBytes()); + cachedLazyStruct.init(byteArrayRef, 0, byteArrayRef.getData().length); } else { throw new SerDeException(getClass().toString() - + ": expects either BytesWritable or Text object!"); + + ": expects either BytesWritable or Text or just an object that" + + " supports toString(bytes[]) method!"); } return cachedLazyStruct; } /** + * Return the result of the toStringer(separators[]) method invocation + * + * @param o Object + * @return serialized representation as a String + * + * @throws SerDeException + */ + private String callToStringOnObject(Object o) throws SerDeException { + for (java.lang.reflect.Method method : + ReflectionUtils.getClass(o).getDeclaredMethods()) { + if (!"toString".equals(method.getName())) { + continue; + } + if (method.getGenericReturnType().getClass().equals( + java.lang.String.class)) { + System.err.println("Method toString() should return a String!"); + break; + } + if (method.getGenericParameterTypes().length == 1) { + try { + return (String) method.invoke(o, + new Object[] { serdeParams.separators }); + } + catch (Exception e) { + throw new SerDeException(getClass().toString() + + "Could not call " + method.getName() + " on " + + ReflectionUtils.getClass(o).toString() + ": " + + e.toString()); + } + } + } + return null; + } + + /** * Returns the ObjectInspector for the row. */ public ObjectInspector getObjectInspector() throws SerDeException { diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java index 6db9bc8..f69faf1 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hive.serde2.lazy; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; import java.util.List; import java.util.Properties; @@ -33,6 +36,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; /** * TestLazySimpleSerDe. @@ -40,6 +44,98 @@ import org.apache.hadoop.io.Text; */ public class TestLazySimpleSerDe extends TestCase { + static private class Test1A implements Writable { + @Override + public String toString() { + return "Test1A"; + } + + public String toString(char[] separators) { + return "Test1"; + } + + @Override + public void readFields(DataInput in) throws IOException { + // TODO Auto-generated method stub + + } + + @Override + public void write(DataOutput out) throws IOException { + // TODO Auto-generated method stub + + } + } + + static private class Test1B implements Writable { + public byte[] toString(byte[] separators) { + return "Test1B".getBytes(); + } + + @Override + public void readFields(DataInput in) throws IOException { + // TODO Auto-generated method stub + + } + + @Override + public void write(DataOutput out) throws IOException { + // TODO Auto-generated method stub + + } + } + + static private class Test1C implements Writable { + @Override + public String toString() { + return "Test1C"; + } + + @Override + public void readFields(DataInput in) throws IOException { + // TODO Auto-generated method stub + + } + + @Override + public void write(DataOutput out) throws IOException { + // TODO Auto-generated method stub + + } + } + + static private class Test2 implements Writable { + public String toString(byte[] separators) { + return "123" + ((char) separators[0]) + + "456" + ((char) separators[0]) + + "789" + ((char) separators[0]) + + "1000" + ((char) separators[0]) + + "6.4" + ((char) separators[0]) + + "hive and hadoop" + ((char) separators[0]) + + "NULL" + ((char) separators[0]) + + "NULL"; + } + + public Object[] getExpectedFieldsData() { + return new Object[] {new ByteWritable((byte) 123), + new ShortWritable((short) 456), new IntWritable(789), + new LongWritable(1000), new DoubleWritable(6.4), + new Text("hive and hadoop"), null, null}; + } + + @Override + public void readFields(DataInput in) throws IOException { + // TODO Auto-generated method stub + + } + + @Override + public void write(DataOutput out) throws IOException { + // TODO Auto-generated method stub + + } + } + /** * Test the LazySimpleSerDe class. */ @@ -189,4 +285,100 @@ public class TestLazySimpleSerDe extends TestCase { } } + /** + * Test exceptions with the LazySimpleSerDe class + */ + public void testLazySimpleSerDeToStringNoMethod() throws Throwable { + try { + // Create the SerDe + LazySimpleSerDe serDe = new LazySimpleSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createProperties(); + serDe.initialize(conf, tbl); + + // Try a class with only toString() method + try { + Object row = serDe.deserialize(new Test1A()); + fail("Should raise an org.apache.hadoop.hive.serde2.SerDeException"); + } + catch (SerDeException e) { + assertEquals("class" + + " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDeCould not call" + + " toString on" + + " class org.apache.hadoop.hive.serde2.lazy." + + "TestLazySimpleSerDe$Test1A:" + + " java.lang.IllegalArgumentException: argument type mismatch", + e.getMessage()); + } + + // Try a class with wrong return type (this is actually broken in Java) + try { + Object row = serDe.deserialize(new Test1B()); + fail("Should raise an org.apache.hadoop.hive.serde2.SerDeException"); + } + catch (SerDeException e) { + assertEquals("class" + + " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDeCould not" + + " call toString on class org.apache.hadoop.hive.serde2.lazy." + + "TestLazySimpleSerDe$Test1B: java.lang.ClassCastException: [B" + + " cannot be cast to java.lang.String", e.getMessage()); + } + + // Try a class with toString(char[]) method + try { + Object row = serDe.deserialize(new Test1C()); + fail("Should raise an org.apache.hadoop.hive.serde2.SerDeException"); + } + catch (SerDeException e) { + assertEquals("class" + + " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe: expects" + + " either BytesWritable or Text or just an object that supports" + + " toString(bytes[]) method!", e.getMessage()); + } + + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } + + /** + * Test the LazySimpleSerDe with an arbitrary Writable with toString(byte[]) + * method + */ + public void testLazySimpleSerDeToString() throws Throwable { + try { + // Create the SerDe + LazySimpleSerDe serDe = new LazySimpleSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createProperties(); + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "58"); + serDe.initialize(conf, tbl); + + // Data + Test2 t = new Test2(); + Object[] expectedFieldsData = t.getExpectedFieldsData(); + + // Test + StructObjectInspector oi = (StructObjectInspector) serDe + .getObjectInspector(); + List fieldRefs = oi.getAllStructFieldRefs(); + assertEquals(8, fieldRefs.size()); + + // Deserialize only + Object row = serDe.deserialize(t); + for (int i = 0; i < fieldRefs.size(); i++) { + Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i)); + if (fieldData != null) { + fieldData = ((LazyPrimitive) fieldData).getWritableObject(); + } + assertEquals("Field " + i, expectedFieldsData[i], fieldData); + } + + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } + }