diff --git data/files/tsformat.json data/files/tsformat.json new file mode 100644 index 0000000..247efae --- /dev/null +++ data/files/tsformat.json @@ -0,0 +1,2 @@ +{"c1": 123, "c2": "abc", "c3": "2001-02-03T12:34:56"} +{"c1": 456, "c2": "xyz", "c3": "1906-04-18T05:12:00"} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java index 9c87aa3..e06c243 100644 --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java @@ -70,6 +70,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.apache.hive.common.util.HiveStringUtils; +import org.apache.hive.common.util.TimestampParser; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type; @@ -91,6 +93,7 @@ private JsonFactory jsonFactory = null; private HCatRecordObjectInspector cachedObjectInspector; + private TimestampParser tsParser; @Override public void initialize(Configuration conf, Properties tbl) @@ -138,6 +141,8 @@ public void initialize(Configuration conf, Properties tbl) } jsonFactory = new JsonFactory(); + tsParser = new TimestampParser( + HiveStringUtils.splitAndUnEscape(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS))); } /** @@ -300,7 +305,7 @@ private Object extractCurrentField(JsonParser p, HCatFieldSchema hcatFieldSchema val = (valueToken == JsonToken.VALUE_NULL) ? null : Date.valueOf(p.getText()); break; case TIMESTAMP: - val = (valueToken == JsonToken.VALUE_NULL) ? null : Timestamp.valueOf(p.getText()); + val = (valueToken == JsonToken.VALUE_NULL) ? null : tsParser.parseTimestamp(p.getText()); break; case DECIMAL: val = (valueToken == JsonToken.VALUE_NULL) ? null : HiveDecimal.create(p.getText()); diff --git ql/src/test/queries/clientpositive/json_serde_tsformat.q ql/src/test/queries/clientpositive/json_serde_tsformat.q new file mode 100644 index 0000000..c00450c --- /dev/null +++ ql/src/test/queries/clientpositive/json_serde_tsformat.q @@ -0,0 +1,12 @@ + +add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar; + +CREATE TABLE t1 (c1 int, c2 string, c3 timestamp) +ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' +WITH SERDEPROPERTIES ('timestamp.formats'='yyyy-MM-dd\'T\'HH:mm:ss') +; +LOAD DATA LOCAL INPATH "../../data/files/tsformat.json" INTO TABLE t1; +select a.c1, a.c2, b.c3 +from t1 a join t1 b on a.c1 = b.c1; + +drop table t1; diff --git ql/src/test/results/clientpositive/json_serde_tsformat.q.out ql/src/test/results/clientpositive/json_serde_tsformat.q.out new file mode 100644 index 0000000..eb5eeb1 --- /dev/null +++ ql/src/test/results/clientpositive/json_serde_tsformat.q.out @@ -0,0 +1,40 @@ +PREHOOK: query: CREATE TABLE t1 (c1 int, c2 string, c3 timestamp) +ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' +WITH SERDEPROPERTIES ('timestamp.formats'='yyyy-MM-dd\'T\'HH:mm:ss') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (c1 int, c2 string, c3 timestamp) +ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' +WITH SERDEPROPERTIES ('timestamp.formats'='yyyy-MM-dd\'T\'HH:mm:ss') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/tsformat.json" INTO TABLE t1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/tsformat.json" INTO TABLE t1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: select a.c1, a.c2, b.c3 +from t1 a join t1 b on a.c1 = b.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select a.c1, a.c2, b.c3 +from t1 a join t1 b on a.c1 = b.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +123 abc 2001-02-03 12:34:56 +456 xyz 1906-04-18 05:12:00 +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1