diff --git data/files/timestamp_data.txt data/files/timestamp_data.txt new file mode 100644 index 0000000..edc22ea --- /dev/null +++ data/files/timestamp_data.txt @@ -0,0 +1,13 @@ +1366672285 +1366672333.594127116 +2013-04-22 16:11:25.0 +1 +NULL +2013-04-22 16:12:13.594127876 +67 +1366672333.999999999 +null +nULL +1366672333.99999999999 +randomtext +234randomtext diff --git ql/src/test/queries/clientpositive/timestamp_4.q ql/src/test/queries/clientpositive/timestamp_4.q new file mode 100644 index 0000000..bc0c702 --- /dev/null +++ ql/src/test/queries/clientpositive/timestamp_4.q @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS timestamp_4; + +CREATE TABLE timestamp_4 (t TIMESTAMP); +ALTER TABLE timestamp_4 SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; + +-- timestamp_data.txt contains timestamps in below supported formats +-- 1. Integer: UNIX epoch seconds +-- 2. Floating point: UNIX epoch seconds plus nanoseconds +-- 3. Strings: JDBC compliant java.sql.Timestamp format +-- "YYYY-MM-DD HH:MM:SS.fffffffff" (9 decimal place precision) +LOAD DATA LOCAL INPATH '../data/files/timestamp_data.txt' INTO TABLE timestamp_4; + +SELECT t FROM timestamp_4; +SELECT CAST(t AS TINYINT) FROM timestamp_4; +SELECT CAST(t AS SMALLINT) FROM timestamp_4; +SELECT CAST(t AS INT) FROM timestamp_4; +SELECT CAST(t AS BIGINT) FROM timestamp_4; +SELECT CAST(t AS FLOAT) FROM timestamp_4; +SELECT CAST(t AS DOUBLE) FROM timestamp_4; +SELECT CAST(t AS STRING) FROM timestamp_4; + +SELECT * FROM timestamp_4 ORDER BY t; + +SELECT * FROM timestamp_4 WHERE t = '2013-04-22 16:11:25'; + +SELECT * FROM timestamp_4 WHERE t > '2013-04-22 16:11:25'; + +SELECT * FROM timestamp_4 WHERE t < '2013-04-22 16:11:25'; + +SELECT * FROM timestamp_4 WHERE t <> '2013-04-22 16:11:25'; + +DROP TABLE timestamp_4; diff --git ql/src/test/queries/clientpositive/timestamp_null.q ql/src/test/queries/clientpositive/timestamp_null.q deleted file mode 100644 index efd5bc4..0000000 --- ql/src/test/queries/clientpositive/timestamp_null.q +++ /dev/null @@ -1,7 +0,0 @@ -DROP TABLE IF EXISTS timestamp_null; -CREATE TABLE timestamp_null (t1 TIMESTAMP); -LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null; - -SELECT * FROM timestamp_null LIMIT 1; - -SELECT t1 FROM timestamp_null LIMIT 1; diff --git ql/src/test/results/clientpositive/timestamp_4.q.out ql/src/test/results/clientpositive/timestamp_4.q.out new file mode 100644 index 0000000..efba859 --- /dev/null +++ ql/src/test/results/clientpositive/timestamp_4.q.out @@ -0,0 +1,274 @@ +PREHOOK: query: DROP TABLE IF EXISTS timestamp_4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS timestamp_4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE timestamp_4 (t TIMESTAMP) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE timestamp_4 (t TIMESTAMP) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@timestamp_4 +PREHOOK: query: ALTER TABLE timestamp_4 SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@timestamp_4 +PREHOOK: Output: default@timestamp_4 +POSTHOOK: query: ALTER TABLE timestamp_4 SET SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@timestamp_4 +POSTHOOK: Output: default@timestamp_4 +PREHOOK: query: -- timestamp_data.txt contains timestamps in below supported formats +-- 1. Integer: UNIX epoch seconds +-- 2. Floating point: UNIX epoch seconds plus nanoseconds +-- 3. Strings: JDBC compliant java.sql.Timestamp format +-- "YYYY-MM-DD HH:MM:SS.fffffffff" (9 decimal place precision) +LOAD DATA LOCAL INPATH '../data/files/timestamp_data.txt' INTO TABLE timestamp_4 +PREHOOK: type: LOAD +PREHOOK: Output: default@timestamp_4 +POSTHOOK: query: -- timestamp_data.txt contains timestamps in below supported formats +-- 1. Integer: UNIX epoch seconds +-- 2. Floating point: UNIX epoch seconds plus nanoseconds +-- 3. Strings: JDBC compliant java.sql.Timestamp format +-- "YYYY-MM-DD HH:MM:SS.fffffffff" (9 decimal place precision) +LOAD DATA LOCAL INPATH '../data/files/timestamp_data.txt' INTO TABLE timestamp_4 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@timestamp_4 +PREHOOK: query: SELECT t FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:11:25 +2013-04-22 16:12:13.594127116 +2013-04-22 16:11:25 +1969-12-31 16:00:01 +NULL +2013-04-22 16:12:13.594127876 +1969-12-31 16:01:07 +2013-04-22 16:12:13.999999999 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT CAST(t AS TINYINT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS TINYINT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +-99 +-51 +-99 +1 +NULL +-51 +67 +-51 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT CAST(t AS SMALLINT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS SMALLINT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +-15459 +-15411 +-15459 +1 +NULL +-15411 +67 +-15411 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT CAST(t AS INT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS INT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1366672285 +1366672333 +1366672285 +1 +NULL +1366672333 +67 +1366672333 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT CAST(t AS BIGINT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS BIGINT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1366672285 +1366672333 +1366672285 +1 +NULL +1366672333 +67 +1366672333 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT CAST(t AS FLOAT) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS FLOAT) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1.36667226E9 +1.36667238E9 +1.36667226E9 +1.0 +NULL +1.36667238E9 +67.0 +1.36667238E9 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT CAST(t AS DOUBLE) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS DOUBLE) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1.366672285E9 +1.3666723335941272E9 +1.366672285E9 +1.0 +NULL +1.366672333594128E9 +67.0 +1.366672334E9 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT CAST(t AS STRING) FROM timestamp_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(t AS STRING) FROM timestamp_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:11:25 +2013-04-22 16:12:13.594127116 +2013-04-22 16:11:25 +1969-12-31 16:00:01 +NULL +2013-04-22 16:12:13.594127876 +1969-12-31 16:01:07 +2013-04-22 16:12:13.999999999 +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: SELECT * FROM timestamp_4 ORDER BY t +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 ORDER BY t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL +NULL +1969-12-31 16:00:01 +1969-12-31 16:01:07 +2013-04-22 16:11:25 +2013-04-22 16:11:25 +2013-04-22 16:12:13.594127116 +2013-04-22 16:12:13.594127876 +2013-04-22 16:12:13.999999999 +PREHOOK: query: SELECT * FROM timestamp_4 WHERE t = '2013-04-22 16:11:25' +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 WHERE t = '2013-04-22 16:11:25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:11:25 +2013-04-22 16:11:25 +PREHOOK: query: SELECT * FROM timestamp_4 WHERE t > '2013-04-22 16:11:25' +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 WHERE t > '2013-04-22 16:11:25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:12:13.594127116 +2013-04-22 16:12:13.594127876 +2013-04-22 16:12:13.999999999 +PREHOOK: query: SELECT * FROM timestamp_4 WHERE t < '2013-04-22 16:11:25' +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 WHERE t < '2013-04-22 16:11:25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +1969-12-31 16:00:01 +1969-12-31 16:01:07 +PREHOOK: query: SELECT * FROM timestamp_4 WHERE t <> '2013-04-22 16:11:25' +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_4 WHERE t <> '2013-04-22 16:11:25' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_4 +#### A masked pattern was here #### +2013-04-22 16:12:13.594127116 +1969-12-31 16:00:01 +2013-04-22 16:12:13.594127876 +1969-12-31 16:01:07 +2013-04-22 16:12:13.999999999 +PREHOOK: query: DROP TABLE timestamp_4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@timestamp_4 +PREHOOK: Output: default@timestamp_4 +POSTHOOK: query: DROP TABLE timestamp_4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@timestamp_4 +POSTHOOK: Output: default@timestamp_4 diff --git ql/src/test/results/clientpositive/timestamp_null.q.out ql/src/test/results/clientpositive/timestamp_null.q.out deleted file mode 100644 index d21b880..0000000 --- ql/src/test/results/clientpositive/timestamp_null.q.out +++ /dev/null @@ -1,33 +0,0 @@ -PREHOOK: query: DROP TABLE IF EXISTS timestamp_null -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS timestamp_null -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE timestamp_null (t1 TIMESTAMP) -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE timestamp_null (t1 TIMESTAMP) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@timestamp_null -PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null -PREHOOK: type: LOAD -PREHOOK: Output: default@timestamp_null -POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/test.dat' OVERWRITE INTO TABLE timestamp_null -POSTHOOK: type: LOAD -POSTHOOK: Output: default@timestamp_null -PREHOOK: query: SELECT * FROM timestamp_null LIMIT 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamp_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM timestamp_null LIMIT 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamp_null -#### A masked pattern was here #### -NULL -PREHOOK: query: SELECT t1 FROM timestamp_null LIMIT 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@timestamp_null -#### A masked pattern was here #### -POSTHOOK: query: SELECT t1 FROM timestamp_null LIMIT 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@timestamp_null -#### A masked pattern was here #### -NULL diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java index 27895c5..ef6e35b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.OutputStream; import java.io.UnsupportedEncodingException; +import java.math.BigDecimal; import java.sql.Timestamp; import org.apache.commons.logging.Log; @@ -37,6 +38,7 @@ */ public class LazyTimestamp extends LazyPrimitive { static final private Log LOG = LogFactory.getLog(LazyTimestamp.class); + static final private BigDecimal NANOSECONDS_PERSEC_BD = new BigDecimal(1000000000); public LazyTimestamp(LazyTimestampObjectInspector oi) { super(oi); @@ -67,16 +69,44 @@ public void init(ByteArrayRef bytes, int start, int length) { } Timestamp t = null; - if (s.compareTo("NULL") == 0) { + isNull = false; + if (s.compareToIgnoreCase("NULL") == 0) { isNull = true; logExceptionMessage(bytes, start, length, "TIMESTAMP"); } else { + // Supported timestamp formats: + // Integer: Interpreted as UNIX timestamp in seconds + // Floating point: Interpreted as UNIX timestamp in seconds with decimal precision + // Strings: JDBC compliant java.sql.Timestamp format "YYYY-MM-DD HH:MM:SS.fffffffff" + // (9 decimal place precision) + + // Assume the given format is JDBC compliant and try to convert + // if it fails fall back to conversion from other two formats try { t = Timestamp.valueOf(s); - isNull = false; } catch (IllegalArgumentException e) { - isNull = true; - logExceptionMessage(bytes, start, length, "TIMESTAMP"); + try { + BigDecimal value = new BigDecimal(s.trim()); + long valueAbs = value.longValue(); + t = new Timestamp(valueAbs*1000); + + if (value.scale()>0) { + // convert the decimal part into nanoseconds and set it in Timestamp + if (value.scale()>9) { + // invalid nanosecond part precision. + // java.sql.Timestamp.setNanos() can't take beyond 9 decimal precision + isNull = true; + logExceptionMessage(bytes, start, length, "TIMESTAMP"); + } else { + value = value.subtract(new BigDecimal(valueAbs)); + value = value.multiply(NANOSECONDS_PERSEC_BD); + t.setNanos(value.intValue()); + } + } + } catch(Exception ex) { + isNull = true; + logExceptionMessage(bytes, start, length, "TIMESTAMP"); + } } } data.set(t);