diff --git contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java index 9a162d5..61490b2 100644 --- contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java +++ contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java @@ -20,11 +20,13 @@ package org.apache.hadoop.hive.contrib.serde2; import java.io.IOException; +import java.nio.charset.Charset; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.regex.Pattern; +import com.google.common.base.Charsets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.*; @@ -91,6 +93,8 @@ // The Writable to return in serialize private final Text serializeCache = new Text(); + protected Charset charset; + @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // get the SerDe parameters @@ -121,6 +125,8 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException assert serdeParams.getColumnNames().size() == serdeParams.getColumnTypes().size(); numColumns = serdeParams.getColumnNames().size(); + + charset = Charset.forName(tbl.getProperty(serdeConstants.SERIALIZATION_ENCODING, "UTF-8")); } @@ -148,6 +154,9 @@ public Object deserialize(Writable blob) throws SerDeException { rowStr = new String(b.getBytes()); } else if (blob instanceof Text) { Text rowText = (Text) blob; + if (!this.charset.equals(Charsets.UTF_8)) { + rowText = transformToUTF8((Text) blob); + } rowStr = rowText.toString(); } else { throw new SerDeException(getClass() + ": expects either BytesWritable or Text object!"); @@ -191,6 +200,9 @@ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDe } serializeCache.set(serializeStream.getData(), 0, serializeStream.getLength()); + if (!this.charset.equals(Charsets.UTF_8)) { + return transformFromUTF8(serializeCache); + } return serializeCache; } @@ -286,6 +298,16 @@ private static void serializeNoEncode(ByteStream.Output out, Object obj, throw new RuntimeException("Unknown category type: "+ objInspector.getCategory()); } + protected Text transformFromUTF8(Writable blob) { + Text text = (Text)blob; + return SerDeUtils.transformTextFromUTF8(text, this.charset); + } + + protected Text transformToUTF8(Text blob) { + Text text = (Text)blob; + return SerDeUtils.transformTextToUTF8(text, this.charset); + } + @Override public SerDeStats getSerDeStats() { // no support for statistics