diff --git contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java index 9a162d5..45ef6c6 100644 --- contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java +++ contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java @@ -25,6 +25,7 @@ import java.util.Properties; import java.util.regex.Pattern; +import com.google.common.base.Charsets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.*; @@ -63,7 +64,7 @@ serdeConstants.SERIALIZATION_ENCODING, LazySerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS, LazySerDeParameters.SERIALIZATION_EXTEND_ADDITIONAL_NESTING_LEVELS}) -public class MultiDelimitSerDe extends AbstractSerDe { +public class MultiDelimitSerDe extends AbstractEncodingAwareSerDe { private static final byte[] DEFAULT_SEPARATORS = {(byte) 1, (byte) 2, (byte) 3}; // Due to HIVE-6404, define our own constant @@ -121,6 +122,7 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException assert serdeParams.getColumnNames().size() == serdeParams.getColumnTypes().size(); numColumns = serdeParams.getColumnNames().size(); + } @@ -134,8 +136,8 @@ public ObjectInspector getObjectInspector() throws SerDeException { return Text.class; } - @Override - public Object deserialize(Writable blob) throws SerDeException { + + @Override public Object doDeserialize(Writable blob) throws SerDeException { if (byteArrayRef == null) { byteArrayRef = new ByteArrayRef(); } @@ -145,9 +147,9 @@ public Object deserialize(Writable blob) throws SerDeException { String rowStr; if (blob instanceof BytesWritable) { BytesWritable b = (BytesWritable) blob; - rowStr = new String(b.getBytes()); + rowStr = new String(b.getBytes()); } else if (blob instanceof Text) { - Text rowText = (Text) blob; + Text rowText = (Text) blob ; rowStr = rowText.toString(); } else { throw new SerDeException(getClass() + ": expects either BytesWritable or Text object!"); @@ -159,8 +161,8 @@ public Object deserialize(Writable blob) throws SerDeException { return cachedLazyStruct; } - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { + @Override public Writable doSerialize(Object obj, ObjectInspector objInspector) + throws SerDeException { StructObjectInspector soi = (StructObjectInspector) objInspector; List fields = soi.getAllStructFieldRefs(); List list = soi.getStructFieldsDataAsList(obj); @@ -286,6 +288,16 @@ private static void serializeNoEncode(ByteStream.Output out, Object obj, throw new RuntimeException("Unknown category type: "+ objInspector.getCategory()); } + protected Text transformFromUTF8(Writable blob) { + Text text = (Text)blob; + return SerDeUtils.transformTextFromUTF8(text, this.charset); + } + + protected Text transformToUTF8(Writable blob) { + Text text = (Text) blob; + return SerDeUtils.transformTextToUTF8(text, this.charset); + } + @Override public SerDeStats getSerDeStats() { // no support for statistics