diff --git a/ql/src/test/queries/clientpositive/select_newlinechars.q b/ql/src/test/queries/clientpositive/select_newlinechars.q new file mode 100644 index 0000000000000000000000000000000000000000..9c010de1035829e5af95d3852c2d6fda1361277f --- /dev/null +++ b/ql/src/test/queries/clientpositive/select_newlinechars.q @@ -0,0 +1,4 @@ +drop table if exists strsim; +create table strsim (strcol string); +insert overwrite table strsim select "book" from src limit 1; +select count (*) from (select "a\rb\nc", narray from strsim LATERAL VIEW explode(array(1)) C AS narray) lav; diff --git a/ql/src/test/results/clientpositive/select_newlinechars.q.out b/ql/src/test/results/clientpositive/select_newlinechars.q.out new file mode 100644 index 0000000000000000000000000000000000000000..d79bd9467c85edbf7323f701fdc6b5072d89c61f --- /dev/null +++ b/ql/src/test/results/clientpositive/select_newlinechars.q.out @@ -0,0 +1,30 @@ +PREHOOK: query: drop table if exists strsim +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists strsim +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table strsim (strcol string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@strsim +POSTHOOK: query: create table strsim (strcol string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@strsim +PREHOOK: query: insert overwrite table strsim select "book" from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@strsim +POSTHOOK: query: insert overwrite table strsim select "book" from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@strsim +POSTHOOK: Lineage: strsim.strcol SIMPLE [] +PREHOOK: query: select count (*) from (select "a\rb\nc", narray from strsim LATERAL VIEW explode(array(1)) C AS narray) lav +PREHOOK: type: QUERY +PREHOOK: Input: default@strsim +#### A masked pattern was here #### +POSTHOOK: query: select count (*) from (select "a\rb\nc", narray from strsim LATERAL VIEW explode(array(1)) C AS narray) lav +POSTHOOK: type: QUERY +POSTHOOK: Input: default@strsim +#### A masked pattern was here #### +1 diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 3943508685158349f3ff68cfe0543628c613047e..ddbdfc65b204aac49db743566e3674726ff5854c 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -151,13 +151,20 @@ public static void writeEscaped(OutputStream out, byte[] bytes, int start, if (escaped) { int end = start + len; for (int i = start; i <= end; i++) { - if (i == end || (bytes[i] >= 0 && needsEscape[bytes[i]])) { + if (i == end || (bytes[i] >= 0 && needsEscape[bytes[i]]) || bytes[i] == (byte)'\r' || bytes[i] == (byte)'\n') { if (i > start) { out.write(bytes, start, i - start); } start = i; if (i < len) { out.write(escapeChar); + if(bytes[i] == (byte)'\r') { + out.write((byte)('r')); + start = i + 1; + } else if (bytes[i] == (byte)'\n'){ + out.write((byte)('n')); + start = i + 1; + } // the current char will be written out later. } } @@ -443,7 +450,14 @@ public static void copyAndEscapeStringDataToText(byte[] inputBytes, int start, i } else { // get the next byte i++; - outputBytes[k++] = inputBytes[start + i]; + if ((inputBytes[start + i]) == 'r') { + outputBytes[k++] = '\r'; + } else if ((inputBytes[start + i]) == 'n') { + outputBytes[k++] = '\n'; + } else { + outputBytes[k++] = inputBytes[start + i]; + } + } } assert (k == outputLength);