diff --git a/common/src/test/org/apache/hadoop/hive/common/type/TestHiveTimestamp.java b/common/src/test/org/apache/hadoop/hive/common/type/TestHiveTimestamp.java new file mode 100644 index 0000000..5993b2d --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/type/TestHiveTimestamp.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.TimeZone; +import java.util.concurrent.ThreadLocalRandom; + +public class TestHiveTimestamp { + + private static TimeZone defaultTZ; + private static final String[] IDs = TimeZone.getAvailableIDs(); + + @BeforeClass + public static void storeDefaultTZ() { + defaultTZ = TimeZone.getDefault(); + } + + @Before + public void setTZ() { + int index = ThreadLocalRandom.current().nextInt(IDs.length); + TimeZone.setDefault(TimeZone.getTimeZone(IDs[index])); + } + + @AfterClass + public static void restoreTZ() { + TimeZone.setDefault(defaultTZ); + } + + @Test + public void testParse() { + // No timezone specified + String s1 = "2016-01-03 12:26:34.0123"; + Assert.assertEquals(s1, HiveTimestamp.valueOf(s1).toString()); + // With timezone + String s2 = s1 + " UTC"; + Assert.assertEquals(s1 + " GMT", HiveTimestamp.valueOf(s2).toString()); + Assert.assertEquals(s1 + " GMT+08:00", HiveTimestamp.valueOf(s1, "Asia/Shanghai").toString()); + } + + @Test + public void testHandleDST() { + // Same timezone can have different offset due to DST + String s1 = "2005-01-03 02:01:00"; + Assert.assertEquals(s1 + ".0 GMT", HiveTimestamp.valueOf(s1, "Europe/London").toString()); + String s2 = "2005-06-03 02:01:00.30547"; + Assert.assertEquals(s2 + " GMT+01:00", HiveTimestamp.valueOf(s2, "Europe/London").toString()); + // Can print time with DST properly + String s3 = "2005-04-03 02:01:00.04067"; + Assert.assertEquals("2005-04-03 03:01:00.04067 GMT-07:00", + HiveTimestamp.valueOf(s3, "America/Los_Angeles").toString()); + } + + @Test + public void testBadZoneID() { + try { + new HiveTimestamp(0, "Foo id"); + Assert.fail("Invalid timezone ID should cause exception"); + } catch (IllegalArgumentException e) { + // expected + } + } +} diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java index b90e576..f57bdd6 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java @@ -20,13 +20,12 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.io.OutputStream; import java.sql.Timestamp; -import java.text.DateFormat; -import java.text.SimpleDateFormat; import java.util.Date; +import com.google.common.base.Preconditions; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveTimestamp; import org.apache.hadoop.hive.ql.util.TimestampUtils; import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; @@ -61,21 +60,17 @@ private static final long SEVEN_BYTE_LONG_SIGN_FLIP = 0xff80L << 48; + private static final int FOUR_BYTE_INT_SIGN_FLIP = 1 << 31; - /** The maximum number of bytes required for a TimestampWritable */ - public static final int MAX_BYTES = 13; + private static final int TIMEZONE_MASK = 1 << 30; - public static final int BINARY_SORTABLE_LENGTH = 11; - private static final ThreadLocal threadLocalDateFormat = - new ThreadLocal() { - @Override - protected DateFormat initialValue() { - return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - } - }; + /** The maximum number of bytes required for a TimestampWritable */ + public static final int MAX_BYTES = 16; + + public static final int BINARY_SORTABLE_LENGTH = 15; - private Timestamp timestamp = new Timestamp(0); + private Timestamp timestamp = new HiveTimestamp(0); /** * true if data is stored in timestamp field rather than byte arrays. @@ -131,6 +126,9 @@ public void set(Timestamp t) { if (t == null) { timestamp.setTime(0); timestamp.setNanos(0); + if (timestamp instanceof HiveTimestamp) { + ((HiveTimestamp) timestamp).setOffsetInMin(null); + } return; } this.timestamp = t; @@ -150,7 +148,7 @@ public void set(TimestampWritable t) { } } - public static void updateTimestamp(Timestamp timestamp, long secondsAsMillis, int nanos) { + private static void updateTimestamp(Timestamp timestamp, long secondsAsMillis, int nanos) { ((Date) timestamp).setTime(secondsAsMillis); timestamp.setNanos(nanos); } @@ -203,6 +201,36 @@ public int getNanos() { } } + private Integer getTimezoneOffset() { + if (!timestampEmpty) { + return timestamp instanceof HiveTimestamp ? + ((HiveTimestamp) timestamp).getOffsetInMin() : null; + } else if (!bytesEmpty) { + return hasDecimalOrSecondVInt() ? getTimezoneOffset(currentBytes, offset + 4) : null; + } else { + throw new IllegalStateException("Both timestamp and bytes are empty"); + } + } + + // offset should point to the start of decimal field + private static Integer getTimezoneOffset(byte[] bytes, final int offset) { + if (hasTimezoneOffset(bytes, offset)) { + int pos = offset + WritableUtils.decodeVIntSize(bytes[offset]); + // skip the 2nd VInt + if (hasSecondVInt(bytes[offset])) { + pos += WritableUtils.decodeVIntSize(bytes[pos]); + } + return readVInt(bytes, pos); + } + return null; + } + + private static boolean hasTimezoneOffset(byte[] bytes, int offset) { + int val = readVInt(bytes, offset); + return (val >= 0 && (val & TIMEZONE_MASK) != 0) || + (val < 0 && (val & TIMEZONE_MASK) == 0); + } + /** * @return length of serialized TimestampWritable data. As a side effect, populates the internal * byte array if empty. @@ -213,15 +241,19 @@ int getTotalLength() { } public static int getTotalLength(byte[] bytes, int offset) { - int len = 4; + int pos = offset + 4; if (hasDecimalOrSecondVInt(bytes[offset])) { - int firstVIntLen = WritableUtils.decodeVIntSize(bytes[offset + 4]); - len += firstVIntLen; - if (hasSecondVInt(bytes[offset + 4])) { - len += WritableUtils.decodeVIntSize(bytes[offset + 4 + firstVIntLen]); + boolean hasSecondVInt = hasSecondVInt(bytes[pos]); + boolean hasTimezoneOffset = hasTimezoneOffset(bytes, pos); + pos += WritableUtils.decodeVIntSize(bytes[pos]); + if (hasSecondVInt) { + pos += WritableUtils.decodeVIntSize(bytes[pos]); + } + if (hasTimezoneOffset) { + pos += WritableUtils.decodeVIntSize(bytes[pos]); } } - return len; + return pos - offset; } public Timestamp getTimestamp() { @@ -247,7 +279,7 @@ public Timestamp getTimestamp() { /** * @return byte[] representation of TimestampWritable that is binary - * sortable (7 bytes for seconds, 4 bytes for nanoseconds) + * sortable (7 bytes for seconds, 4 bytes for nanoseconds, 4 bytes for timezone offset) */ public byte[] getBinarySortable() { byte[] b = new byte[BINARY_SORTABLE_LENGTH]; @@ -257,6 +289,11 @@ public Timestamp getTimestamp() { long seconds = getSeconds() ^ SEVEN_BYTE_LONG_SIGN_FLIP; sevenByteLongToBytes(seconds, b, 0); intToBytes(nanos, b, 7); + Integer tzOffset = getTimezoneOffset(); + if (tzOffset == null) { + tzOffset = HiveTimestamp.NULL_OFFSET; + } + intToBytes(tzOffset ^ FOUR_BYTE_INT_SIGN_FLIP, b, 11); return b; } @@ -270,20 +307,27 @@ public void setBinarySortable(byte[] bytes, int binSortOffset) { // Flip the sign bit (and unused bits of the high-order byte) of the seven-byte long back. long seconds = readSevenByteLong(bytes, binSortOffset) ^ SEVEN_BYTE_LONG_SIGN_FLIP; int nanos = bytesToInt(bytes, binSortOffset + 7); + int tzOffset = bytesToInt(bytes, binSortOffset + 11) ^ FOUR_BYTE_INT_SIGN_FLIP; + boolean hasTimezone = HiveTimestamp.isValidOffset(tzOffset); int firstInt = (int) seconds; boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE; - if (nanos != 0 || hasSecondVInt) { + if (nanos != 0 || hasSecondVInt || hasTimezone) { firstInt |= DECIMAL_OR_SECOND_VINT_FLAG; } else { firstInt &= LOWEST_31_BITS_OF_SEC_MASK; } intToBytes(firstInt, internalBytes, 0); - setNanosBytes(nanos, internalBytes, 4, hasSecondVInt); + setNanosBytes(nanos, internalBytes, 4, hasSecondVInt, hasTimezone); + int pos = 4; if (hasSecondVInt) { - LazyBinaryUtils.writeVLongToByteArray(internalBytes, - 4 + WritableUtils.decodeVIntSize(internalBytes[4]), - seconds >> 31); + pos += WritableUtils.decodeVIntSize(internalBytes[pos]); + LazyBinaryUtils.writeVLongToByteArray(internalBytes, pos, seconds >> 31); + } + + if (hasTimezone) { + pos += WritableUtils.decodeVIntSize(internalBytes[pos]); + LazyBinaryUtils.writeVLongToByteArray(internalBytes, pos, tzOffset); } currentBytes = internalBytes; @@ -298,7 +342,7 @@ public void setBinarySortable(byte[] bytes, int binSortOffset) { private void checkBytes() { if (bytesEmpty) { // Populate byte[] from Timestamp - convertTimestampToBytes(timestamp, internalBytes, 0); + populateBytes(); offset = 0; currentBytes = internalBytes; bytesEmpty = false; @@ -331,25 +375,26 @@ public void readFields(DataInput in) throws IOException { in.readFully(internalBytes, 4, 1); int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]); if (len > 1) { - in.readFully(internalBytes, 5, len-1); + in.readFully(internalBytes, 5, len - 1); } - long vlong = LazyBinaryUtils.readVLongFromByteArray(internalBytes, 4); - if (vlong < -1000000000 || vlong > 999999999) { - throw new IOException( - "Invalid first vint value (encoded nanoseconds) of a TimestampWritable: " + vlong + - ", expected to be between -1000000000 and 999999999."); - // Note that -1000000000 is a valid value corresponding to a nanosecond timestamp - // of 999999999, because if the second VInt is present, we use the value - // (-reversedNanoseconds - 1) as the second VInt. - } - if (vlong < 0) { + int pos = 4 + len; + if (hasSecondVInt(internalBytes[4])) { // This indicates there is a second VInt containing the additional bits of the seconds // field. - in.readFully(internalBytes, 4 + len, 1); - int secondVIntLen = (byte) WritableUtils.decodeVIntSize(internalBytes[4 + len]); + in.readFully(internalBytes, pos, 1); + int secondVIntLen = (byte) WritableUtils.decodeVIntSize(internalBytes[pos]); if (secondVIntLen > 1) { - in.readFully(internalBytes, 5 + len, secondVIntLen - 1); + in.readFully(internalBytes, pos + 1, secondVIntLen - 1); + } + pos += secondVIntLen; + } + + if (hasTimezoneOffset(internalBytes, 4)) { + in.readFully(internalBytes, pos, 1); + int tzOffsetLen = WritableUtils.decodeVIntSize(internalBytes[pos]); + if (tzOffsetLen > 1) { + in.readFully(internalBytes, pos + 1, tzOffsetLen - 1); } } } @@ -362,6 +407,7 @@ public void write(DataOutput out) throws IOException { out.write(currentBytes, offset, getTotalLength()); } + @Override public int compareTo(TimestampWritable t) { checkBytes(); long s1 = this.getSeconds(); @@ -370,7 +416,18 @@ public int compareTo(TimestampWritable t) { int n1 = this.getNanos(); int n2 = t.getNanos(); if (n1 == n2) { - return 0; + Integer tz1 = getTimezoneOffset(); + Integer tz2 = t.getTimezoneOffset(); + if (tz1 == null || tz2 == null) { + if (tz1 != null) { + return 1; + } + if (tz2 != null) { + return -1; + } + return 0; + } + return tz1 - tz2; } return n1 - n2; } else { @@ -391,15 +448,14 @@ public String toString() { String timestampString = timestamp.toString(); if (timestampString.length() > 19) { - if (timestampString.length() == 21) { - if (timestampString.substring(19).compareTo(".0") == 0) { - return threadLocalDateFormat.get().format(timestamp); + if (timestampString.substring(19, 21).compareTo(".0") == 0) { + if (timestampString.length() == 21 || !Character.isDigit(timestampString.charAt(21))) { + timestampString = timestampString.substring(0, 19) + timestampString.substring(21); } } - return threadLocalDateFormat.get().format(timestamp) + timestampString.substring(19); } - return threadLocalDateFormat.get().format(timestamp); + return timestampString; } @Override @@ -407,7 +463,12 @@ public int hashCode() { long seconds = getSeconds(); seconds <<= 30; // the nanosecond part fits in 30 bits seconds |= getNanos(); - return (int) ((seconds >>> 32) ^ seconds); + Integer tzOffset = getTimezoneOffset(); + int hash = (int) ((seconds >>> 32) ^ seconds); + if (tzOffset != null) { + hash ^= tzOffset; + } + return hash; } private void populateTimestamp() { @@ -415,6 +476,13 @@ private void populateTimestamp() { int nanos = getNanos(); timestamp.setTime(seconds * 1000); timestamp.setNanos(nanos); + Integer tzOffset = getTimezoneOffset(); + if (timestamp instanceof HiveTimestamp) { + ((HiveTimestamp) timestamp).setOffsetInMin(tzOffset); + } else if (tzOffset != null) { + timestamp = new HiveTimestamp(timestamp); + ((HiveTimestamp) timestamp).setOffsetInMin(tzOffset); + } } /** Static methods **/ @@ -441,13 +509,14 @@ public static long getSeconds(byte[] bytes, int offset) { } public static int getNanos(byte[] bytes, int offset) { - VInt vInt = LazyBinaryUtils.threadLocalVInt.get(); - LazyBinaryUtils.readVInt(bytes, offset, vInt); - int val = vInt.value; + int val = readVInt(bytes, offset); if (val < 0) { + val |= TIMEZONE_MASK; // This means there is a second VInt present that specifies additional bits of the timestamp. // The reversed nanoseconds value is still encoded in this VInt. val = -val - 1; + } else { + val &= ~TIMEZONE_MASK; } int len = (int) Math.floor(Math.log10(val)) + 1; @@ -466,33 +535,43 @@ public static int getNanos(byte[] bytes, int offset) { return val; } + private static int readVInt(byte[] bytes, int offset) { + VInt vInt = LazyBinaryUtils.threadLocalVInt.get(); + LazyBinaryUtils.readVInt(bytes, offset, vInt); + return vInt.value; + } + /** - * Writes a Timestamp's serialized value to byte array b at the given offset - * @param t to convert to bytes - * @param b destination byte array - * @param offset destination offset in the byte array + * Writes the Timestamp's serialized value to the internal byte array. */ - public static void convertTimestampToBytes(Timestamp t, byte[] b, - int offset) { - long millis = t.getTime(); - int nanos = t.getNanos(); + private void populateBytes() { + long millis = timestamp.getTime(); + int nanos = timestamp.getNanos(); + boolean hasTimezone = timestamp instanceof HiveTimestamp && + ((HiveTimestamp) timestamp).hasTimezone(); long seconds = TimestampUtils.millisToSeconds(millis); boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE; - boolean hasDecimal = setNanosBytes(nanos, b, offset+4, hasSecondVInt); + int position = 4; + boolean hasDecimal = setNanosBytes(nanos, internalBytes, position, hasSecondVInt, hasTimezone); int firstInt = (int) seconds; - if (hasDecimal || hasSecondVInt) { + if (hasDecimal || hasSecondVInt || hasTimezone) { firstInt |= DECIMAL_OR_SECOND_VINT_FLAG; } else { firstInt &= LOWEST_31_BITS_OF_SEC_MASK; } - intToBytes(firstInt, b, offset); + intToBytes(firstInt, internalBytes, 0); if (hasSecondVInt) { - LazyBinaryUtils.writeVLongToByteArray(b, - offset + 4 + WritableUtils.decodeVIntSize(b[offset + 4]), - seconds >> 31); + position += WritableUtils.decodeVIntSize(internalBytes[position]); + LazyBinaryUtils.writeVLongToByteArray(internalBytes, position, seconds >> 31); + } + + if (hasTimezone) { + position += WritableUtils.decodeVIntSize(internalBytes[position]); + LazyBinaryUtils.writeVLongToByteArray(internalBytes, position, + ((HiveTimestamp) timestamp).getOffsetInMin()); } } @@ -505,7 +584,8 @@ public static void convertTimestampToBytes(Timestamp t, byte[] b, * @param offset * @return */ - private static boolean setNanosBytes(int nanos, byte[] b, int offset, boolean hasSecondVInt) { + private static boolean setNanosBytes(int nanos, byte[] b, int offset, + boolean hasSecondVInt, boolean hasTimezone) { int decimal = 0; if (nanos != 0) { int counter = 0; @@ -517,10 +597,24 @@ private static boolean setNanosBytes(int nanos, byte[] b, int offset, boolean ha } } - if (hasSecondVInt || decimal != 0) { + if (hasSecondVInt || decimal != 0 || hasTimezone) { // We use the sign of the reversed-nanoseconds field to indicate that there is a second VInt // present. - LazyBinaryUtils.writeVLongToByteArray(b, offset, hasSecondVInt ? (-decimal - 1) : decimal); + int toWrite = decimal; + if (hasSecondVInt) { + toWrite = -toWrite - 1; + } + // Decimal ranges in [-1000000000, 999999999]. Use the second MSB to indicate if + // timezone is present. + // if toWrite >= 0, second MSB is always 0, otherwise it's always 1 + if (hasTimezone) { + if (toWrite >= 0) { + toWrite |= TIMEZONE_MASK; + } else { + toWrite &= ~TIMEZONE_MASK; + } + } + LazyBinaryUtils.writeVLongToByteArray(b, offset, toWrite); } return decimal != 0; } @@ -552,24 +646,20 @@ public static Timestamp longToTimestamp(long time, boolean intToTimestampInSecon } public static void setTimestamp(Timestamp t, byte[] bytes, int offset) { - boolean hasDecimalOrSecondVInt = hasDecimalOrSecondVInt(bytes[offset]); - long seconds = (long) TimestampWritable.getSeconds(bytes, offset); - int nanos = 0; - if (hasDecimalOrSecondVInt) { - nanos = TimestampWritable.getNanos(bytes, offset + 4); - if (hasSecondVInt(bytes[offset + 4])) { - seconds += LazyBinaryUtils.readVLongFromByteArray(bytes, - offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4])); - } - } + long seconds = getSeconds(bytes, offset); + int nanos = getNanos(bytes, offset + 4); t.setTime(seconds * 1000); - if (nanos != 0) { - t.setNanos(nanos); + t.setNanos(nanos); + Integer tzOffset = getTimezoneOffset(bytes, offset + 4); + if (t instanceof HiveTimestamp) { + ((HiveTimestamp) t).setOffsetInMin(tzOffset); + } else { + Preconditions.checkArgument(tzOffset == null); } } public static Timestamp createTimestamp(byte[] bytes, int offset) { - Timestamp t = new Timestamp(0); + Timestamp t = new HiveTimestamp(0); TimestampWritable.setTimestamp(t, bytes, offset); return t; } @@ -586,12 +676,6 @@ private final boolean hasDecimalOrSecondVInt() { return hasDecimalOrSecondVInt(currentBytes[offset]); } - public final boolean hasDecimal() { - return hasDecimalOrSecondVInt() || currentBytes[offset + 4] != -1; - // If the first byte of the VInt is -1, the VInt itself is -1, indicating that there is a - // second VInt but the nanoseconds field is actually 0. - } - /** * Writes value into dest at offset * @param value diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java b/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java index 7619efa..e740b91 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java @@ -493,4 +493,30 @@ public void testBinarySortable() { } } + @Test + public void test2ndMSBOfDecimal() { + // The current decimal part ranges in [-1000000000, 999999999]. We should be able to use its + // second MSB to indicate if a timezone offset exists + int decimal = -1000000000; + final int mask = 1 << 30; + while (decimal < 0) { + assertTrue((decimal & mask) != 0); + decimal++; + } + while (decimal <= 999999999) { + assertTrue((decimal & mask) == 0); + decimal++; + } + } + + @Test + public void testSetTimestamp() { + Timestamp t1 = new Timestamp((1L << 32)); + TimestampWritable writable = new TimestampWritable(t1); + byte[] bytes = writable.getBytes(); + Timestamp t2 = new Timestamp(0); + TimestampWritable.setTimestamp(t2, bytes, 0); + assertEquals(t1, t2); + } + } diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveTimestamp.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveTimestamp.java new file mode 100644 index 0000000..5341c53 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveTimestamp.java @@ -0,0 +1,236 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import org.apache.commons.math3.util.Pair; + +import java.sql.Timestamp; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.NoSuchElementException; +import java.util.TimeZone; + +/** + * A thin wrapper of java.sql.Timestamp, with timezoneID offset. + * Any timestamp that requires a specific timezone should use this type. + */ +public class HiveTimestamp extends Timestamp { + private static final ThreadLocal threadLocalDateFormat = + new ThreadLocal() { + @Override + protected DateFormat initialValue() { + return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + } + }; + + // We store the offset from UTC in minutes . Ranges from [-12:00, 14:00]. + private Integer offsetInMin = null; + + private transient String internalID = null; + + private static final int MAX_OFFSET = 840; + private static final int MIN_OFFSET = -720; + + // Used to indicate no offset is present + public static final int NULL_OFFSET = -800; + + public HiveTimestamp(long time, String timezoneID) { + super(time); + computeOffset(timezoneID); + } + + public HiveTimestamp(long time) { + this(time, null); + } + + public HiveTimestamp(Timestamp other) { + this(other.getTime()); + setNanos(other.getNanos()); + if (other instanceof HiveTimestamp) { + this.offsetInMin = ((HiveTimestamp) other).offsetInMin; + } + } + + private void computeOffset(String timezoneID) { + timezoneID = validateTimezoneID(timezoneID); + if (timezoneID != null) { + TimeZone tz = TimeZone.getTimeZone(timezoneID); + offsetInMin = tz.getOffset(getTime()) / 1000 / 60; + } + } + + public Integer getOffsetInMin() { + return offsetInMin; + } + + public void setOffsetInMin(Integer offsetInMin) { + validateOffset(offsetInMin); + this.offsetInMin = offsetInMin; + internalID = null; + } + + public boolean hasTimezone() { + return offsetInMin != null; + } + + private String getTimezoneID() { + if (!hasTimezone()) { + throw new NoSuchElementException("No timezone specified."); + } + if (internalID == null) { + StringBuilder builder = new StringBuilder("GMT"); + if (offsetInMin != 0) { + if (offsetInMin > 0) { + builder.append("+"); + } else { + builder.append("-"); + } + int tmp = offsetInMin > 0 ? offsetInMin : -offsetInMin; + int offsetHour = tmp / 60; + int offsetMin = tmp % 60; + builder.append(String.format("%02d", offsetHour)).append(":"). + append(String.format("%02d", offsetMin)); + } + internalID = builder.toString(); + } + return internalID; + } + + private static String validateTimezoneID(String timezoneID) { + if (timezoneID == null) { + return null; + } + TimeZone tz = TimeZone.getTimeZone(timezoneID); + // We may end up with GMT in case of invalid timezoneID + if (tz.getID().equals("GMT") && !tz.getID().equals(timezoneID)) { + throw new IllegalArgumentException("Unknown timezoneID: " + timezoneID); + } + return timezoneID; + } + + @Override + public String toString() { + String ts = super.toString(); + if (!hasTimezone()) { + return ts; + } + DateFormat dateFormat = threadLocalDateFormat.get(); + TimeZone defaultTZ = dateFormat.getTimeZone(); + try { + String timezoneID = getTimezoneID(); + dateFormat.setTimeZone(TimeZone.getTimeZone(timezoneID)); + String r = dateFormat.format(this) + ts.substring(19); + r += " " + timezoneID; + return r; + } finally { + dateFormat.setTimeZone(defaultTZ); + } + } + + @Override + public int compareTo(Timestamp ts) { + int result = super.compareTo(ts); + if (result == 0) { + if (ts instanceof HiveTimestamp) { + HiveTimestamp hts = (HiveTimestamp) ts; + if (!hasTimezone() || !hts.hasTimezone()) { + if (hasTimezone()) { + result = 1; + } + if (hts.hasTimezone()) { + result = -1; + } + } else { + result = getOffsetInMin() - hts.getOffsetInMin(); + } + } else if (hasTimezone()) { + result = 1; + } + } + return result; + } + + @Override + public boolean equals(Object o) { + if (o instanceof Timestamp) { + return compareTo((Timestamp) o) == 0; + } + return false; + } + + @Override + public int hashCode() { + int hash = super.hashCode(); + if (hasTimezone()) { + hash ^= getOffsetInMin(); + } + return hash; + } + + public static HiveTimestamp valueOf(String timestamp) { + Pair pair = extractTimezoneID(timestamp); + return valueOf(pair.getFirst(), pair.getSecond()); + } + + public static HiveTimestamp valueOf(String timestamp, String timezoneID) { + Timestamp ts = Timestamp.valueOf(timestamp); + timezoneID = validateTimezoneID(timezoneID); + if (timezoneID == null) { + return new HiveTimestamp(ts); + } + DateFormat dateFormat = threadLocalDateFormat.get(); + TimeZone defaultTZ = dateFormat.getTimeZone(); + try { + int nanos = ts.getNanos(); + dateFormat.setTimeZone(TimeZone.getTimeZone(timezoneID)); + Date date = dateFormat.parse(timestamp); + HiveTimestamp hiveTimestamp = new HiveTimestamp(date.getTime(), timezoneID); + hiveTimestamp.setNanos(nanos); + return hiveTimestamp; + } catch (ParseException e) { + throw new IllegalArgumentException(e); + } finally { + dateFormat.setTimeZone(defaultTZ); + } + } + + // parse s into a timestamp with a timezoneID + private static Pair extractTimezoneID(String s) { + s = s.trim(); + int divide = s.indexOf(' '); + if (divide != -1) { + divide = s.indexOf(' ', divide + 1); + if (divide != -1) { + return new Pair<>(s.substring(0, divide), s.substring(divide + 1)); + } + } + return new Pair<>(s, null); + } + + public static boolean isValidOffset(int offsetInMin) { + return offsetInMin >= MIN_OFFSET && offsetInMin <= MAX_OFFSET; + } + + private static void validateOffset(Integer offsetInMin) { + if (offsetInMin != null && !isValidOffset(offsetInMin)) { + throw new IllegalArgumentException("Timezone offset out of range: " + offsetInMin); + } + } +}