Index: ql/src/test/results/clientpositive/udtf_json_tuple.q.out =================================================================== --- ql/src/test/results/clientpositive/udtf_json_tuple.q.out (revision 1398414) +++ ql/src/test/results/clientpositive/udtf_json_tuple.q.out (working copy) @@ -571,3 +571,51 @@ NULL 1 2 2 value2 1 +PREHOOK: query: -- Verify that json_tuple can handle new lines in JSON values + +CREATE TABLE dest1(c1 STRING) STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Verify that json_tuple can handle new lines in JSON values + +CREATE TABLE dest1(c1 STRING) STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +PREHOOK: query: SELECT * FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +{"a":"b +c"} +PREHOOK: query: SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: json_t.jstring EXPRESSION [] +POSTHOOK: Lineage: json_t.key EXPRESSION [] +b +c Index: ql/src/test/results/clientpositive/udf_get_json_object.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_get_json_object.q.out (revision 1398414) +++ ql/src/test/results/clientpositive/udf_get_json_object.q.out (working copy) @@ -173,3 +173,47 @@ #### A masked pattern was here #### POSTHOOK: Lineage: dest1.c1 SIMPLE [] 1234 +PREHOOK: query: -- Verify that get_json_object can handle new lines in JSON values + +CREATE TABLE dest2(c1 STRING) STORED AS RCFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Verify that get_json_object can handle new lines in JSON values + +CREATE TABLE dest2(c1 STRING) STORED AS RCFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +PREHOOK: query: INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest2 +POSTHOOK: query: INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest2.c1 SIMPLE [] +PREHOOK: query: SELECT * FROM dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest2.c1 SIMPLE [] +{"a":"b +c"} +PREHOOK: query: SELECT get_json_object(c1, '$.a') FROM dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT get_json_object(c1, '$.a') FROM dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 SIMPLE [] +POSTHOOK: Lineage: dest2.c1 SIMPLE [] +b +c Index: ql/src/test/queries/clientpositive/udf_get_json_object.q =================================================================== --- ql/src/test/queries/clientpositive/udf_get_json_object.q (revision 1398414) +++ ql/src/test/queries/clientpositive/udf_get_json_object.q (working copy) @@ -26,4 +26,15 @@ SELECT get_json_object(src_json.json, '$.zip code') FROM src_json; -SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json; \ No newline at end of file +SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json; + + +-- Verify that get_json_object can handle new lines in JSON values + +CREATE TABLE dest2(c1 STRING) STORED AS RCFILE; + +INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1; + +SELECT * FROM dest2; + +SELECT get_json_object(c1, '$.a') FROM dest2; \ No newline at end of file Index: ql/src/test/queries/clientpositive/udtf_json_tuple.q =================================================================== --- ql/src/test/queries/clientpositive/udtf_json_tuple.q (revision 1398414) +++ ql/src/test/queries/clientpositive/udtf_json_tuple.q (working copy) @@ -34,3 +34,14 @@ select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 order by f2; select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 order by f2; + + +-- Verify that json_tuple can handle new lines in JSON values + +CREATE TABLE dest1(c1 STRING) STORED AS RCFILE; + +INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1; + +SELECT * FROM dest1; + +SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json; \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (revision 1398414) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (working copy) @@ -35,6 +35,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonParser.Feature; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.type.TypeFactory; import org.codehaus.jackson.type.JavaType; @@ -51,7 +53,12 @@ private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName()); - private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final JsonFactory JSON_FACTORY = new JsonFactory(); + static { + // Allows for unescaped ASCII control characters in JSON values + JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS); + } + private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY); private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class); int numCols; // number of output columns Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (revision 1398414) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (working copy) @@ -29,6 +29,8 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.Text; +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonParser.Feature; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.type.TypeFactory; import org.codehaus.jackson.type.JavaType; @@ -59,7 +61,12 @@ private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*"); private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); - private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final JsonFactory JSON_FACTORY = new JsonFactory(); + static { + // Allows for unescaped ASCII control characters in JSON values + JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS); + } + private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY); private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class); // An LRU cache using a linked hash map