diff --git a/data/files/encoding_iso-8859-1.txt b/data/files/encoding_iso-8859-1.txt new file mode 100644 index 0000000000000000000000000000000000000000..7df6dc29fb3812202ff1d2188d932258531c140e --- /dev/null +++ b/data/files/encoding_iso-8859-1.txt @@ -0,0 +1,4 @@ +Müller,Thomas +Jørgensen,Jørgen +Peña,Andrés +Nåm,Fæk diff --git a/ql/src/test/queries/clientpositive/encoding_nonutf8.q b/ql/src/test/queries/clientpositive/encoding_nonutf8.q new file mode 100644 index 0000000000000000000000000000000000000000..f4167608992580b0c62eacd892854cb044007505 --- /dev/null +++ b/ql/src/test/queries/clientpositive/encoding_nonutf8.q @@ -0,0 +1,7 @@ +drop table if exists encodelat1; +create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1'); +load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1; +select * from encodelat1; + diff --git a/ql/src/test/results/clientpositive/encoding_nonutf8.q.out b/ql/src/test/results/clientpositive/encoding_nonutf8.q.out new file mode 100644 index 0000000000000000000000000000000000000000..63c8b4591dced0f25f389778a354313ffed9b9ae --- /dev/null +++ b/ql/src/test/results/clientpositive/encoding_nonutf8.q.out @@ -0,0 +1,36 @@ +PREHOOK: query: drop table if exists encodelat1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists encodelat1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@encodelat1 +POSTHOOK: query: create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encodelat1 +PREHOOK: query: load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@encodelat1 +POSTHOOK: query: load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@encodelat1 +PREHOOK: query: select * from encodelat1 +PREHOOK: type: QUERY +PREHOOK: Input: default@encodelat1 +#### A masked pattern was here #### +POSTHOOK: query: select * from encodelat1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encodelat1 +#### A masked pattern was here #### +Müller,Thomas +Jørgensen,Jørgen +Peña,Andrés +NÃ¥m,Fæk diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index 8dada5a324545bf80e26c5766545ff735f7fce40..40ede1a4507d10d199960f64ebb073d364df3d08 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -551,7 +551,7 @@ private SerDeUtils() { } public static Text transformTextToUTF8(Text text, Charset previousCharset) { - return new Text(new String(text.getBytes(), previousCharset)); + return new Text(new String(text.getBytes(), 0, text.getLength(), previousCharset)); } public static Text transformTextFromUTF8(Text text, Charset targetCharset) {