diff --git data/files/timestamp_data.txt data/files/timestamp_data.txt new file mode 100644 index 0000000..edc22ea --- /dev/null +++ data/files/timestamp_data.txt @@ -0,0 +1,13 @@ +1366672285 +1366672333.594127116 +2013-04-22 16:11:25.0 +1 +NULL +2013-04-22 16:12:13.594127876 +67 +1366672333.999999999 +null +nULL +1366672333.99999999999 +randomtext +234randomtext diff --git ql/src/test/queries/clientpositive/timestamp_4.q ql/src/test/queries/clientpositive/timestamp_4.q new file mode 100644 index 0000000..311c6fe --- /dev/null +++ ql/src/test/queries/clientpositive/timestamp_4.q @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS timestamp_4; + +CREATE TABLE timestamp_4 (t TIMESTAMP); +ALTER TABLE timestamp_4 SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; + +-- timestamp_data.txt contains timestamps in below supported formats +-- 1. Integer: UNIX epoch seconds +-- 2. Floating point: UNIX epoch seconds plus nanoseconds +-- 3. Strings: JDBC compliant java.sql.Timestamp format +-- "YYYY-MM-DD HH:MM:SS.fffffffff" (9 decimal place precision) +LOAD DATA LOCAL INPATH '../../data/files/timestamp_data.txt' INTO TABLE timestamp_4; + +SELECT t FROM timestamp_4; +SELECT CAST(t AS TINYINT) FROM timestamp_4; +SELECT CAST(t AS SMALLINT) FROM timestamp_4; +SELECT CAST(t AS INT) FROM timestamp_4; +SELECT CAST(t AS BIGINT) FROM timestamp_4; +SELECT CAST(t AS FLOAT) FROM timestamp_4; +SELECT CAST(t AS DOUBLE) FROM timestamp_4; +SELECT CAST(t AS STRING) FROM timestamp_4; + +SELECT * FROM timestamp_4 ORDER BY t; + +SELECT * FROM timestamp_4 WHERE t = '2013-04-22 16:11:25'; + +SELECT * FROM timestamp_4 WHERE t > '2013-04-22 16:11:25'; + +SELECT * FROM timestamp_4 WHERE t < '2013-04-22 16:11:25'; + +SELECT * FROM timestamp_4 WHERE t <> '2013-04-22 16:11:25'; + +DROP TABLE timestamp_4; diff --git ql/src/test/queries/clientpositive/timestamp_null.q ql/src/test/queries/clientpositive/timestamp_null.q deleted file mode 100644 index 36f3541..0000000 --- ql/src/test/queries/clientpositive/timestamp_null.q +++ /dev/null @@ -1,7 +0,0 @@ -DROP TABLE IF EXISTS timestamp_null; -CREATE TABLE timestamp_null (t1 TIMESTAMP); -LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null; - -SELECT * FROM timestamp_null LIMIT 1; - -SELECT t1 FROM timestamp_null LIMIT 1; diff --git ql/src/test/results/clientpositive/timestamp_4.q.out ql/src/test/results/clientpositive/timestamp_4.q.out new file mode 100644 index 0000000..15f5809 --- /dev/null +++ ql/src/test/results/clientpositive/timestamp_4.q.out @@ -0,0 +1,276 @@ +PREHOOK: query: DROP TABLE IF EXISTS timestamp_4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS timestamp_4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE timestamp_4 (t TIMESTAMP) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE timestamp_4 (t TIMESTAMP) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@timestamp_4 +PREHOOK: query: ALTER TABLE timestamp_4 SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@timestamp_4 +PREHOOK: Output: default@timestamp_4 +POSTHOOK: query: ALTER TABLE timestamp_4 SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@timestamp_4 +POSTHOOK: Output: default@timestamp_4 +PREHOOK: query: -- timestamp_data.txt contains timestamps in below supported formats +-- 1. Integer: UNIX epoch seconds +-- 2. Floating point: UNIX epoch seconds plus nanoseconds +-- 3. Strings: JDBC compliant java.sql.Timestamp format +-- "YYYY-MM-DD HH:MM:SS.fffffffff" (9 decimal place precision) +LOAD DATA LOCAL INPATH '../../data/files/timestamp_data.txt' INTO TABLE timestamp_4 +PREHOOK: type: LOAD +PREHOOK: Output: default@timestamp_4 +POSTHOOK: query: -- timestamp_data.txt contains timestamps in below supported formats +-- 1. Integer: UNIX epoch seconds +-- 2. Floating point: UNIX epoch seconds plus nanoseconds +-- 3. Strings: JDBC compliant java.sql.Timestamp format +-- "YYYY-MM-DD HH:MM:SS.fffffffff" (9 decimal place precision) +LOAD DATA LOCAL INPATH '../../data/files/timestamp_data.txt' INTO TABLE timestamp_4 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@timestamp_4 +PREHOOK: query: SELECT t FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:11:25 +2013-04-22 16:12:13.594127116 +2013-04-22 16:11:25 +1969-12-31 16:00:01 +NULL +2013-04-22 16:12:13.594127876 +1969-12-31 16:01:07 +2013-04-22 16:12:13.999999999 +NULL +NULL +2013-04-22 16:12:13.999999999 +NULL +NULL +PREHOOK: query: SELECT CAST(t AS TINYINT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS TINYINT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +-99 +-51 +-99 +1 +NULL +-51 +67 +-51 +NULL +NULL +-51 +NULL +NULL +PREHOOK: query: SELECT CAST(t AS SMALLINT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS SMALLINT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +-15459 +-15411 +-15459 +1 +NULL +-15411 +67 +-15411 +NULL +NULL +-15411 +NULL +NULL +PREHOOK: query: SELECT CAST(t AS INT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS INT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1366672285 +1366672333 +1366672285 +1 +NULL +1366672333 +67 +1366672333 +NULL +NULL +1366672333 +NULL +NULL +PREHOOK: query: SELECT CAST(t AS BIGINT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS BIGINT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1366672285 +1366672333 +1366672285 +1 +NULL +1366672333 +67 +1366672333 +NULL +NULL +1366672333 +NULL +NULL +PREHOOK: query: SELECT CAST(t AS FLOAT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS FLOAT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1.36667226E9 +1.36667238E9 +1.36667226E9 +1.0 +NULL +1.36667238E9 +67.0 +1.36667238E9 +NULL +NULL +1.36667238E9 +NULL +NULL +PREHOOK: query: SELECT CAST(t AS DOUBLE) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS DOUBLE) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1.366672285E9 +1.3666723335941272E9 +1.366672285E9 +1.0 +NULL +1.366672333594128E9 +67.0 +1.366672334E9 +NULL +NULL +1.366672334E9 +NULL +NULL +PREHOOK: query: SELECT CAST(t AS STRING) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS STRING) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:11:25 +2013-04-22 16:12:13.594127116 +2013-04-22 16:11:25 +1969-12-31 16:00:01 +NULL +2013-04-22 16:12:13.594127876 +1969-12-31 16:01:07 +2013-04-22 16:12:13.999999999 +NULL +NULL +2013-04-22 16:12:13.999999999 +NULL +NULL +PREHOOK: query: SELECT * FROM timestamp_4 ORDER BY t +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 ORDER BY t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL +1969-12-31 16:00:01 +1969-12-31 16:01:07 +2013-04-22 16:11:25 +2013-04-22 16:11:25 +2013-04-22 16:12:13.594127116 +2013-04-22 16:12:13.594127876 +2013-04-22 16:12:13.999999999 +2013-04-22 16:12:13.999999999 +PREHOOK: query: SELECT * FROM timestamp_4 WHERE t = '2013-04-22 16:11:25' +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 WHERE t = '2013-04-22 16:11:25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:11:25 +2013-04-22 16:11:25 +PREHOOK: query: SELECT * FROM timestamp_4 WHERE t > '2013-04-22 16:11:25' +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 WHERE t > '2013-04-22 16:11:25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:12:13.594127116 +2013-04-22 16:12:13.594127876 +2013-04-22 16:12:13.999999999 +2013-04-22 16:12:13.999999999 +PREHOOK: query: SELECT * FROM timestamp_4 WHERE t < '2013-04-22 16:11:25' +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 WHERE t < '2013-04-22 16:11:25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1969-12-31 16:00:01 +1969-12-31 16:01:07 +PREHOOK: query: SELECT * FROM timestamp_4 WHERE t <> '2013-04-22 16:11:25' +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 WHERE t <> '2013-04-22 16:11:25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:12:13.594127116 +1969-12-31 16:00:01 +2013-04-22 16:12:13.594127876 +1969-12-31 16:01:07 +2013-04-22 16:12:13.999999999 +2013-04-22 16:12:13.999999999 +PREHOOK: query: DROP TABLE timestamp_4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@timestamp_4 +PREHOOK: Output: default@timestamp_4 +POSTHOOK: query: DROP TABLE timestamp_4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@timestamp_4 +POSTHOOK: Output: default@timestamp_4 diff --git ql/src/test/results/clientpositive/timestamp_null.q.out ql/src/test/results/clientpositive/timestamp_null.q.out deleted file mode 100644 index 57269d7..0000000 --- ql/src/test/results/clientpositive/timestamp_null.q.out +++ /dev/null @@ -1,33 +0,0 @@ -PREHOOK: query: DROP TABLE IF EXISTS timestamp_null -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS timestamp_null -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE timestamp_null (t1 TIMESTAMP) -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE timestamp_null (t1 TIMESTAMP) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@timestamp_null -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null -PREHOOK: type: LOAD -PREHOOK: Output: default@timestamp_null -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null -POSTHOOK: type: LOAD -POSTHOOK: Output: default@timestamp_null -PREHOOK: query: SELECT * FROM timestamp_null LIMIT 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamp_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM timestamp_null LIMIT 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamp_null -#### A masked pattern was here #### -NULL -PREHOOK: query: SELECT t1 FROM timestamp_null LIMIT 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamp_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT t1 FROM timestamp_null LIMIT 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamp_null -#### A masked pattern was here #### -NULL diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java index 435d6c6..fce637c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java @@ -519,7 +519,11 @@ public static Timestamp floatToTimestamp(float f) { } public static Timestamp decimalToTimestamp(HiveDecimal d) { - BigDecimal nanoInstant = d.bigDecimalValue().multiply(BILLION_BIG_DECIMAL); + return bigDecimalToTimestamp(d.bigDecimalValue()); + } + + public static Timestamp bigDecimalToTimestamp(BigDecimal bd) { + BigDecimal nanoInstant = bd.multiply(BILLION_BIG_DECIMAL); int nanos = nanoInstant.remainder(BILLION_BIG_DECIMAL).intValue(); if (nanos < 0) { nanos += 1000000000; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java index 27895c5..e77440e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java @@ -20,7 +20,10 @@ import java.io.IOException; import java.io.OutputStream; import java.io.UnsupportedEncodingException; +import java.math.BigDecimal; import java.sql.Timestamp; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -37,6 +40,7 @@ */ public class LazyTimestamp extends LazyPrimitive { static final private Log LOG = LogFactory.getLog(LazyTimestamp.class); + static Pattern jdbcTSPattern = Pattern.compile("(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})"); public LazyTimestamp(LazyTimestampObjectInspector oi) { super(oi); @@ -67,14 +71,23 @@ public void init(ByteArrayRef bytes, int start, int length) { } Timestamp t = null; - if (s.compareTo("NULL") == 0) { + isNull = false; + if (s.compareToIgnoreCase("NULL") == 0) { isNull = true; - logExceptionMessage(bytes, start, length, "TIMESTAMP"); } else { + // Supported timestamp formats: + // Integer: Interpreted as UNIX timestamp in seconds + // Floating point: Interpreted as UNIX timestamp in seconds with decimal precision + // Strings: JDBC compliant java.sql.Timestamp format "YYYY-MM-DD HH:MM:SS.fffffffff" + // (9 decimal place precision) + try { - t = Timestamp.valueOf(s); - isNull = false; - } catch (IllegalArgumentException e) { + Matcher m = jdbcTSPattern.matcher(s); + if(m.find()) + t = Timestamp.valueOf(s); + else + t = TimestampWritable.bigDecimalToTimestamp(new BigDecimal(s.trim())); + } catch(Exception e) { isNull = true; logExceptionMessage(bytes, start, length, "TIMESTAMP"); }