diff --git a/common/src/test/org/apache/hadoop/hive/common/type/TestHiveTimestamp.java b/common/src/test/org/apache/hadoop/hive/common/type/TestHiveTimestamp.java
new file mode 100644
index 0000000..5993b2d
--- /dev/null
+++ b/common/src/test/org/apache/hadoop/hive/common/type/TestHiveTimestamp.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.type;
+
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.TimeZone;
+import java.util.concurrent.ThreadLocalRandom;
+
+public class TestHiveTimestamp {
+
+ private static TimeZone defaultTZ;
+ private static final String[] IDs = TimeZone.getAvailableIDs();
+
+ @BeforeClass
+ public static void storeDefaultTZ() {
+ defaultTZ = TimeZone.getDefault();
+ }
+
+ @Before
+ public void setTZ() {
+ int index = ThreadLocalRandom.current().nextInt(IDs.length);
+ TimeZone.setDefault(TimeZone.getTimeZone(IDs[index]));
+ }
+
+ @AfterClass
+ public static void restoreTZ() {
+ TimeZone.setDefault(defaultTZ);
+ }
+
+ @Test
+ public void testParse() {
+ // No timezone specified
+ String s1 = "2016-01-03 12:26:34.0123";
+ Assert.assertEquals(s1, HiveTimestamp.valueOf(s1).toString());
+ // With timezone
+ String s2 = s1 + " UTC";
+ Assert.assertEquals(s1 + " GMT", HiveTimestamp.valueOf(s2).toString());
+ Assert.assertEquals(s1 + " GMT+08:00", HiveTimestamp.valueOf(s1, "Asia/Shanghai").toString());
+ }
+
+ @Test
+ public void testHandleDST() {
+ // Same timezone can have different offset due to DST
+ String s1 = "2005-01-03 02:01:00";
+ Assert.assertEquals(s1 + ".0 GMT", HiveTimestamp.valueOf(s1, "Europe/London").toString());
+ String s2 = "2005-06-03 02:01:00.30547";
+ Assert.assertEquals(s2 + " GMT+01:00", HiveTimestamp.valueOf(s2, "Europe/London").toString());
+ // Can print time with DST properly
+ String s3 = "2005-04-03 02:01:00.04067";
+ Assert.assertEquals("2005-04-03 03:01:00.04067 GMT-07:00",
+ HiveTimestamp.valueOf(s3, "America/Los_Angeles").toString());
+ }
+
+ @Test
+ public void testBadZoneID() {
+ try {
+ new HiveTimestamp(0, "Foo id");
+ Assert.fail("Invalid timezone ID should cause exception");
+ } catch (IllegalArgumentException e) {
+ // expected
+ }
+ }
+}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java
index b90e576..f57bdd6 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java
@@ -20,13 +20,12 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.io.OutputStream;
import java.sql.Timestamp;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
import java.util.Date;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveTimestamp;
import org.apache.hadoop.hive.ql.util.TimestampUtils;
import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
@@ -61,21 +60,17 @@
private static final long SEVEN_BYTE_LONG_SIGN_FLIP = 0xff80L << 48;
+ private static final int FOUR_BYTE_INT_SIGN_FLIP = 1 << 31;
- /** The maximum number of bytes required for a TimestampWritable */
- public static final int MAX_BYTES = 13;
+ private static final int TIMEZONE_MASK = 1 << 30;
- public static final int BINARY_SORTABLE_LENGTH = 11;
- private static final ThreadLocal threadLocalDateFormat =
- new ThreadLocal() {
- @Override
- protected DateFormat initialValue() {
- return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- }
- };
+ /** The maximum number of bytes required for a TimestampWritable */
+ public static final int MAX_BYTES = 16;
+
+ public static final int BINARY_SORTABLE_LENGTH = 15;
- private Timestamp timestamp = new Timestamp(0);
+ private Timestamp timestamp = new HiveTimestamp(0);
/**
* true if data is stored in timestamp field rather than byte arrays.
@@ -131,6 +126,9 @@ public void set(Timestamp t) {
if (t == null) {
timestamp.setTime(0);
timestamp.setNanos(0);
+ if (timestamp instanceof HiveTimestamp) {
+ ((HiveTimestamp) timestamp).setOffsetInMin(null);
+ }
return;
}
this.timestamp = t;
@@ -150,7 +148,7 @@ public void set(TimestampWritable t) {
}
}
- public static void updateTimestamp(Timestamp timestamp, long secondsAsMillis, int nanos) {
+ private static void updateTimestamp(Timestamp timestamp, long secondsAsMillis, int nanos) {
((Date) timestamp).setTime(secondsAsMillis);
timestamp.setNanos(nanos);
}
@@ -203,6 +201,36 @@ public int getNanos() {
}
}
+ private Integer getTimezoneOffset() {
+ if (!timestampEmpty) {
+ return timestamp instanceof HiveTimestamp ?
+ ((HiveTimestamp) timestamp).getOffsetInMin() : null;
+ } else if (!bytesEmpty) {
+ return hasDecimalOrSecondVInt() ? getTimezoneOffset(currentBytes, offset + 4) : null;
+ } else {
+ throw new IllegalStateException("Both timestamp and bytes are empty");
+ }
+ }
+
+ // offset should point to the start of decimal field
+ private static Integer getTimezoneOffset(byte[] bytes, final int offset) {
+ if (hasTimezoneOffset(bytes, offset)) {
+ int pos = offset + WritableUtils.decodeVIntSize(bytes[offset]);
+ // skip the 2nd VInt
+ if (hasSecondVInt(bytes[offset])) {
+ pos += WritableUtils.decodeVIntSize(bytes[pos]);
+ }
+ return readVInt(bytes, pos);
+ }
+ return null;
+ }
+
+ private static boolean hasTimezoneOffset(byte[] bytes, int offset) {
+ int val = readVInt(bytes, offset);
+ return (val >= 0 && (val & TIMEZONE_MASK) != 0) ||
+ (val < 0 && (val & TIMEZONE_MASK) == 0);
+ }
+
/**
* @return length of serialized TimestampWritable data. As a side effect, populates the internal
* byte array if empty.
@@ -213,15 +241,19 @@ int getTotalLength() {
}
public static int getTotalLength(byte[] bytes, int offset) {
- int len = 4;
+ int pos = offset + 4;
if (hasDecimalOrSecondVInt(bytes[offset])) {
- int firstVIntLen = WritableUtils.decodeVIntSize(bytes[offset + 4]);
- len += firstVIntLen;
- if (hasSecondVInt(bytes[offset + 4])) {
- len += WritableUtils.decodeVIntSize(bytes[offset + 4 + firstVIntLen]);
+ boolean hasSecondVInt = hasSecondVInt(bytes[pos]);
+ boolean hasTimezoneOffset = hasTimezoneOffset(bytes, pos);
+ pos += WritableUtils.decodeVIntSize(bytes[pos]);
+ if (hasSecondVInt) {
+ pos += WritableUtils.decodeVIntSize(bytes[pos]);
+ }
+ if (hasTimezoneOffset) {
+ pos += WritableUtils.decodeVIntSize(bytes[pos]);
}
}
- return len;
+ return pos - offset;
}
public Timestamp getTimestamp() {
@@ -247,7 +279,7 @@ public Timestamp getTimestamp() {
/**
* @return byte[] representation of TimestampWritable that is binary
- * sortable (7 bytes for seconds, 4 bytes for nanoseconds)
+ * sortable (7 bytes for seconds, 4 bytes for nanoseconds, 4 bytes for timezone offset)
*/
public byte[] getBinarySortable() {
byte[] b = new byte[BINARY_SORTABLE_LENGTH];
@@ -257,6 +289,11 @@ public Timestamp getTimestamp() {
long seconds = getSeconds() ^ SEVEN_BYTE_LONG_SIGN_FLIP;
sevenByteLongToBytes(seconds, b, 0);
intToBytes(nanos, b, 7);
+ Integer tzOffset = getTimezoneOffset();
+ if (tzOffset == null) {
+ tzOffset = HiveTimestamp.NULL_OFFSET;
+ }
+ intToBytes(tzOffset ^ FOUR_BYTE_INT_SIGN_FLIP, b, 11);
return b;
}
@@ -270,20 +307,27 @@ public void setBinarySortable(byte[] bytes, int binSortOffset) {
// Flip the sign bit (and unused bits of the high-order byte) of the seven-byte long back.
long seconds = readSevenByteLong(bytes, binSortOffset) ^ SEVEN_BYTE_LONG_SIGN_FLIP;
int nanos = bytesToInt(bytes, binSortOffset + 7);
+ int tzOffset = bytesToInt(bytes, binSortOffset + 11) ^ FOUR_BYTE_INT_SIGN_FLIP;
+ boolean hasTimezone = HiveTimestamp.isValidOffset(tzOffset);
int firstInt = (int) seconds;
boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE;
- if (nanos != 0 || hasSecondVInt) {
+ if (nanos != 0 || hasSecondVInt || hasTimezone) {
firstInt |= DECIMAL_OR_SECOND_VINT_FLAG;
} else {
firstInt &= LOWEST_31_BITS_OF_SEC_MASK;
}
intToBytes(firstInt, internalBytes, 0);
- setNanosBytes(nanos, internalBytes, 4, hasSecondVInt);
+ setNanosBytes(nanos, internalBytes, 4, hasSecondVInt, hasTimezone);
+ int pos = 4;
if (hasSecondVInt) {
- LazyBinaryUtils.writeVLongToByteArray(internalBytes,
- 4 + WritableUtils.decodeVIntSize(internalBytes[4]),
- seconds >> 31);
+ pos += WritableUtils.decodeVIntSize(internalBytes[pos]);
+ LazyBinaryUtils.writeVLongToByteArray(internalBytes, pos, seconds >> 31);
+ }
+
+ if (hasTimezone) {
+ pos += WritableUtils.decodeVIntSize(internalBytes[pos]);
+ LazyBinaryUtils.writeVLongToByteArray(internalBytes, pos, tzOffset);
}
currentBytes = internalBytes;
@@ -298,7 +342,7 @@ public void setBinarySortable(byte[] bytes, int binSortOffset) {
private void checkBytes() {
if (bytesEmpty) {
// Populate byte[] from Timestamp
- convertTimestampToBytes(timestamp, internalBytes, 0);
+ populateBytes();
offset = 0;
currentBytes = internalBytes;
bytesEmpty = false;
@@ -331,25 +375,26 @@ public void readFields(DataInput in) throws IOException {
in.readFully(internalBytes, 4, 1);
int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]);
if (len > 1) {
- in.readFully(internalBytes, 5, len-1);
+ in.readFully(internalBytes, 5, len - 1);
}
- long vlong = LazyBinaryUtils.readVLongFromByteArray(internalBytes, 4);
- if (vlong < -1000000000 || vlong > 999999999) {
- throw new IOException(
- "Invalid first vint value (encoded nanoseconds) of a TimestampWritable: " + vlong +
- ", expected to be between -1000000000 and 999999999.");
- // Note that -1000000000 is a valid value corresponding to a nanosecond timestamp
- // of 999999999, because if the second VInt is present, we use the value
- // (-reversedNanoseconds - 1) as the second VInt.
- }
- if (vlong < 0) {
+ int pos = 4 + len;
+ if (hasSecondVInt(internalBytes[4])) {
// This indicates there is a second VInt containing the additional bits of the seconds
// field.
- in.readFully(internalBytes, 4 + len, 1);
- int secondVIntLen = (byte) WritableUtils.decodeVIntSize(internalBytes[4 + len]);
+ in.readFully(internalBytes, pos, 1);
+ int secondVIntLen = (byte) WritableUtils.decodeVIntSize(internalBytes[pos]);
if (secondVIntLen > 1) {
- in.readFully(internalBytes, 5 + len, secondVIntLen - 1);
+ in.readFully(internalBytes, pos + 1, secondVIntLen - 1);
+ }
+ pos += secondVIntLen;
+ }
+
+ if (hasTimezoneOffset(internalBytes, 4)) {
+ in.readFully(internalBytes, pos, 1);
+ int tzOffsetLen = WritableUtils.decodeVIntSize(internalBytes[pos]);
+ if (tzOffsetLen > 1) {
+ in.readFully(internalBytes, pos + 1, tzOffsetLen - 1);
}
}
}
@@ -362,6 +407,7 @@ public void write(DataOutput out) throws IOException {
out.write(currentBytes, offset, getTotalLength());
}
+ @Override
public int compareTo(TimestampWritable t) {
checkBytes();
long s1 = this.getSeconds();
@@ -370,7 +416,18 @@ public int compareTo(TimestampWritable t) {
int n1 = this.getNanos();
int n2 = t.getNanos();
if (n1 == n2) {
- return 0;
+ Integer tz1 = getTimezoneOffset();
+ Integer tz2 = t.getTimezoneOffset();
+ if (tz1 == null || tz2 == null) {
+ if (tz1 != null) {
+ return 1;
+ }
+ if (tz2 != null) {
+ return -1;
+ }
+ return 0;
+ }
+ return tz1 - tz2;
}
return n1 - n2;
} else {
@@ -391,15 +448,14 @@ public String toString() {
String timestampString = timestamp.toString();
if (timestampString.length() > 19) {
- if (timestampString.length() == 21) {
- if (timestampString.substring(19).compareTo(".0") == 0) {
- return threadLocalDateFormat.get().format(timestamp);
+ if (timestampString.substring(19, 21).compareTo(".0") == 0) {
+ if (timestampString.length() == 21 || !Character.isDigit(timestampString.charAt(21))) {
+ timestampString = timestampString.substring(0, 19) + timestampString.substring(21);
}
}
- return threadLocalDateFormat.get().format(timestamp) + timestampString.substring(19);
}
- return threadLocalDateFormat.get().format(timestamp);
+ return timestampString;
}
@Override
@@ -407,7 +463,12 @@ public int hashCode() {
long seconds = getSeconds();
seconds <<= 30; // the nanosecond part fits in 30 bits
seconds |= getNanos();
- return (int) ((seconds >>> 32) ^ seconds);
+ Integer tzOffset = getTimezoneOffset();
+ int hash = (int) ((seconds >>> 32) ^ seconds);
+ if (tzOffset != null) {
+ hash ^= tzOffset;
+ }
+ return hash;
}
private void populateTimestamp() {
@@ -415,6 +476,13 @@ private void populateTimestamp() {
int nanos = getNanos();
timestamp.setTime(seconds * 1000);
timestamp.setNanos(nanos);
+ Integer tzOffset = getTimezoneOffset();
+ if (timestamp instanceof HiveTimestamp) {
+ ((HiveTimestamp) timestamp).setOffsetInMin(tzOffset);
+ } else if (tzOffset != null) {
+ timestamp = new HiveTimestamp(timestamp);
+ ((HiveTimestamp) timestamp).setOffsetInMin(tzOffset);
+ }
}
/** Static methods **/
@@ -441,13 +509,14 @@ public static long getSeconds(byte[] bytes, int offset) {
}
public static int getNanos(byte[] bytes, int offset) {
- VInt vInt = LazyBinaryUtils.threadLocalVInt.get();
- LazyBinaryUtils.readVInt(bytes, offset, vInt);
- int val = vInt.value;
+ int val = readVInt(bytes, offset);
if (val < 0) {
+ val |= TIMEZONE_MASK;
// This means there is a second VInt present that specifies additional bits of the timestamp.
// The reversed nanoseconds value is still encoded in this VInt.
val = -val - 1;
+ } else {
+ val &= ~TIMEZONE_MASK;
}
int len = (int) Math.floor(Math.log10(val)) + 1;
@@ -466,33 +535,43 @@ public static int getNanos(byte[] bytes, int offset) {
return val;
}
+ private static int readVInt(byte[] bytes, int offset) {
+ VInt vInt = LazyBinaryUtils.threadLocalVInt.get();
+ LazyBinaryUtils.readVInt(bytes, offset, vInt);
+ return vInt.value;
+ }
+
/**
- * Writes a Timestamp's serialized value to byte array b at the given offset
- * @param t to convert to bytes
- * @param b destination byte array
- * @param offset destination offset in the byte array
+ * Writes the Timestamp's serialized value to the internal byte array.
*/
- public static void convertTimestampToBytes(Timestamp t, byte[] b,
- int offset) {
- long millis = t.getTime();
- int nanos = t.getNanos();
+ private void populateBytes() {
+ long millis = timestamp.getTime();
+ int nanos = timestamp.getNanos();
+ boolean hasTimezone = timestamp instanceof HiveTimestamp &&
+ ((HiveTimestamp) timestamp).hasTimezone();
long seconds = TimestampUtils.millisToSeconds(millis);
boolean hasSecondVInt = seconds < 0 || seconds > Integer.MAX_VALUE;
- boolean hasDecimal = setNanosBytes(nanos, b, offset+4, hasSecondVInt);
+ int position = 4;
+ boolean hasDecimal = setNanosBytes(nanos, internalBytes, position, hasSecondVInt, hasTimezone);
int firstInt = (int) seconds;
- if (hasDecimal || hasSecondVInt) {
+ if (hasDecimal || hasSecondVInt || hasTimezone) {
firstInt |= DECIMAL_OR_SECOND_VINT_FLAG;
} else {
firstInt &= LOWEST_31_BITS_OF_SEC_MASK;
}
- intToBytes(firstInt, b, offset);
+ intToBytes(firstInt, internalBytes, 0);
if (hasSecondVInt) {
- LazyBinaryUtils.writeVLongToByteArray(b,
- offset + 4 + WritableUtils.decodeVIntSize(b[offset + 4]),
- seconds >> 31);
+ position += WritableUtils.decodeVIntSize(internalBytes[position]);
+ LazyBinaryUtils.writeVLongToByteArray(internalBytes, position, seconds >> 31);
+ }
+
+ if (hasTimezone) {
+ position += WritableUtils.decodeVIntSize(internalBytes[position]);
+ LazyBinaryUtils.writeVLongToByteArray(internalBytes, position,
+ ((HiveTimestamp) timestamp).getOffsetInMin());
}
}
@@ -505,7 +584,8 @@ public static void convertTimestampToBytes(Timestamp t, byte[] b,
* @param offset
* @return
*/
- private static boolean setNanosBytes(int nanos, byte[] b, int offset, boolean hasSecondVInt) {
+ private static boolean setNanosBytes(int nanos, byte[] b, int offset,
+ boolean hasSecondVInt, boolean hasTimezone) {
int decimal = 0;
if (nanos != 0) {
int counter = 0;
@@ -517,10 +597,24 @@ private static boolean setNanosBytes(int nanos, byte[] b, int offset, boolean ha
}
}
- if (hasSecondVInt || decimal != 0) {
+ if (hasSecondVInt || decimal != 0 || hasTimezone) {
// We use the sign of the reversed-nanoseconds field to indicate that there is a second VInt
// present.
- LazyBinaryUtils.writeVLongToByteArray(b, offset, hasSecondVInt ? (-decimal - 1) : decimal);
+ int toWrite = decimal;
+ if (hasSecondVInt) {
+ toWrite = -toWrite - 1;
+ }
+ // Decimal ranges in [-1000000000, 999999999]. Use the second MSB to indicate if
+ // timezone is present.
+ // if toWrite >= 0, second MSB is always 0, otherwise it's always 1
+ if (hasTimezone) {
+ if (toWrite >= 0) {
+ toWrite |= TIMEZONE_MASK;
+ } else {
+ toWrite &= ~TIMEZONE_MASK;
+ }
+ }
+ LazyBinaryUtils.writeVLongToByteArray(b, offset, toWrite);
}
return decimal != 0;
}
@@ -552,24 +646,20 @@ public static Timestamp longToTimestamp(long time, boolean intToTimestampInSecon
}
public static void setTimestamp(Timestamp t, byte[] bytes, int offset) {
- boolean hasDecimalOrSecondVInt = hasDecimalOrSecondVInt(bytes[offset]);
- long seconds = (long) TimestampWritable.getSeconds(bytes, offset);
- int nanos = 0;
- if (hasDecimalOrSecondVInt) {
- nanos = TimestampWritable.getNanos(bytes, offset + 4);
- if (hasSecondVInt(bytes[offset + 4])) {
- seconds += LazyBinaryUtils.readVLongFromByteArray(bytes,
- offset + 4 + WritableUtils.decodeVIntSize(bytes[offset + 4]));
- }
- }
+ long seconds = getSeconds(bytes, offset);
+ int nanos = getNanos(bytes, offset + 4);
t.setTime(seconds * 1000);
- if (nanos != 0) {
- t.setNanos(nanos);
+ t.setNanos(nanos);
+ Integer tzOffset = getTimezoneOffset(bytes, offset + 4);
+ if (t instanceof HiveTimestamp) {
+ ((HiveTimestamp) t).setOffsetInMin(tzOffset);
+ } else {
+ Preconditions.checkArgument(tzOffset == null);
}
}
public static Timestamp createTimestamp(byte[] bytes, int offset) {
- Timestamp t = new Timestamp(0);
+ Timestamp t = new HiveTimestamp(0);
TimestampWritable.setTimestamp(t, bytes, offset);
return t;
}
@@ -586,12 +676,6 @@ private final boolean hasDecimalOrSecondVInt() {
return hasDecimalOrSecondVInt(currentBytes[offset]);
}
- public final boolean hasDecimal() {
- return hasDecimalOrSecondVInt() || currentBytes[offset + 4] != -1;
- // If the first byte of the VInt is -1, the VInt itself is -1, indicating that there is a
- // second VInt but the nanoseconds field is actually 0.
- }
-
/**
* Writes value into dest at offset
* @param value
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java b/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java
index 7619efa..e740b91 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampWritable.java
@@ -493,4 +493,30 @@ public void testBinarySortable() {
}
}
+ @Test
+ public void test2ndMSBOfDecimal() {
+ // The current decimal part ranges in [-1000000000, 999999999]. We should be able to use its
+ // second MSB to indicate if a timezone offset exists
+ int decimal = -1000000000;
+ final int mask = 1 << 30;
+ while (decimal < 0) {
+ assertTrue((decimal & mask) != 0);
+ decimal++;
+ }
+ while (decimal <= 999999999) {
+ assertTrue((decimal & mask) == 0);
+ decimal++;
+ }
+ }
+
+ @Test
+ public void testSetTimestamp() {
+ Timestamp t1 = new Timestamp((1L << 32));
+ TimestampWritable writable = new TimestampWritable(t1);
+ byte[] bytes = writable.getBytes();
+ Timestamp t2 = new Timestamp(0);
+ TimestampWritable.setTimestamp(t2, bytes, 0);
+ assertEquals(t1, t2);
+ }
+
}
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveTimestamp.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveTimestamp.java
new file mode 100644
index 0000000..5341c53
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveTimestamp.java
@@ -0,0 +1,236 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.type;
+
+import org.apache.commons.math3.util.Pair;
+
+import java.sql.Timestamp;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.NoSuchElementException;
+import java.util.TimeZone;
+
+/**
+ * A thin wrapper of java.sql.Timestamp, with timezoneID offset.
+ * Any timestamp that requires a specific timezone should use this type.
+ */
+public class HiveTimestamp extends Timestamp {
+ private static final ThreadLocal threadLocalDateFormat =
+ new ThreadLocal() {
+ @Override
+ protected DateFormat initialValue() {
+ return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ }
+ };
+
+ // We store the offset from UTC in minutes . Ranges from [-12:00, 14:00].
+ private Integer offsetInMin = null;
+
+ private transient String internalID = null;
+
+ private static final int MAX_OFFSET = 840;
+ private static final int MIN_OFFSET = -720;
+
+ // Used to indicate no offset is present
+ public static final int NULL_OFFSET = -800;
+
+ public HiveTimestamp(long time, String timezoneID) {
+ super(time);
+ computeOffset(timezoneID);
+ }
+
+ public HiveTimestamp(long time) {
+ this(time, null);
+ }
+
+ public HiveTimestamp(Timestamp other) {
+ this(other.getTime());
+ setNanos(other.getNanos());
+ if (other instanceof HiveTimestamp) {
+ this.offsetInMin = ((HiveTimestamp) other).offsetInMin;
+ }
+ }
+
+ private void computeOffset(String timezoneID) {
+ timezoneID = validateTimezoneID(timezoneID);
+ if (timezoneID != null) {
+ TimeZone tz = TimeZone.getTimeZone(timezoneID);
+ offsetInMin = tz.getOffset(getTime()) / 1000 / 60;
+ }
+ }
+
+ public Integer getOffsetInMin() {
+ return offsetInMin;
+ }
+
+ public void setOffsetInMin(Integer offsetInMin) {
+ validateOffset(offsetInMin);
+ this.offsetInMin = offsetInMin;
+ internalID = null;
+ }
+
+ public boolean hasTimezone() {
+ return offsetInMin != null;
+ }
+
+ private String getTimezoneID() {
+ if (!hasTimezone()) {
+ throw new NoSuchElementException("No timezone specified.");
+ }
+ if (internalID == null) {
+ StringBuilder builder = new StringBuilder("GMT");
+ if (offsetInMin != 0) {
+ if (offsetInMin > 0) {
+ builder.append("+");
+ } else {
+ builder.append("-");
+ }
+ int tmp = offsetInMin > 0 ? offsetInMin : -offsetInMin;
+ int offsetHour = tmp / 60;
+ int offsetMin = tmp % 60;
+ builder.append(String.format("%02d", offsetHour)).append(":").
+ append(String.format("%02d", offsetMin));
+ }
+ internalID = builder.toString();
+ }
+ return internalID;
+ }
+
+ private static String validateTimezoneID(String timezoneID) {
+ if (timezoneID == null) {
+ return null;
+ }
+ TimeZone tz = TimeZone.getTimeZone(timezoneID);
+ // We may end up with GMT in case of invalid timezoneID
+ if (tz.getID().equals("GMT") && !tz.getID().equals(timezoneID)) {
+ throw new IllegalArgumentException("Unknown timezoneID: " + timezoneID);
+ }
+ return timezoneID;
+ }
+
+ @Override
+ public String toString() {
+ String ts = super.toString();
+ if (!hasTimezone()) {
+ return ts;
+ }
+ DateFormat dateFormat = threadLocalDateFormat.get();
+ TimeZone defaultTZ = dateFormat.getTimeZone();
+ try {
+ String timezoneID = getTimezoneID();
+ dateFormat.setTimeZone(TimeZone.getTimeZone(timezoneID));
+ String r = dateFormat.format(this) + ts.substring(19);
+ r += " " + timezoneID;
+ return r;
+ } finally {
+ dateFormat.setTimeZone(defaultTZ);
+ }
+ }
+
+ @Override
+ public int compareTo(Timestamp ts) {
+ int result = super.compareTo(ts);
+ if (result == 0) {
+ if (ts instanceof HiveTimestamp) {
+ HiveTimestamp hts = (HiveTimestamp) ts;
+ if (!hasTimezone() || !hts.hasTimezone()) {
+ if (hasTimezone()) {
+ result = 1;
+ }
+ if (hts.hasTimezone()) {
+ result = -1;
+ }
+ } else {
+ result = getOffsetInMin() - hts.getOffsetInMin();
+ }
+ } else if (hasTimezone()) {
+ result = 1;
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof Timestamp) {
+ return compareTo((Timestamp) o) == 0;
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = super.hashCode();
+ if (hasTimezone()) {
+ hash ^= getOffsetInMin();
+ }
+ return hash;
+ }
+
+ public static HiveTimestamp valueOf(String timestamp) {
+ Pair pair = extractTimezoneID(timestamp);
+ return valueOf(pair.getFirst(), pair.getSecond());
+ }
+
+ public static HiveTimestamp valueOf(String timestamp, String timezoneID) {
+ Timestamp ts = Timestamp.valueOf(timestamp);
+ timezoneID = validateTimezoneID(timezoneID);
+ if (timezoneID == null) {
+ return new HiveTimestamp(ts);
+ }
+ DateFormat dateFormat = threadLocalDateFormat.get();
+ TimeZone defaultTZ = dateFormat.getTimeZone();
+ try {
+ int nanos = ts.getNanos();
+ dateFormat.setTimeZone(TimeZone.getTimeZone(timezoneID));
+ Date date = dateFormat.parse(timestamp);
+ HiveTimestamp hiveTimestamp = new HiveTimestamp(date.getTime(), timezoneID);
+ hiveTimestamp.setNanos(nanos);
+ return hiveTimestamp;
+ } catch (ParseException e) {
+ throw new IllegalArgumentException(e);
+ } finally {
+ dateFormat.setTimeZone(defaultTZ);
+ }
+ }
+
+ // parse s into a timestamp with a timezoneID
+ private static Pair extractTimezoneID(String s) {
+ s = s.trim();
+ int divide = s.indexOf(' ');
+ if (divide != -1) {
+ divide = s.indexOf(' ', divide + 1);
+ if (divide != -1) {
+ return new Pair<>(s.substring(0, divide), s.substring(divide + 1));
+ }
+ }
+ return new Pair<>(s, null);
+ }
+
+ public static boolean isValidOffset(int offsetInMin) {
+ return offsetInMin >= MIN_OFFSET && offsetInMin <= MAX_OFFSET;
+ }
+
+ private static void validateOffset(Integer offsetInMin) {
+ if (offsetInMin != null && !isValidOffset(offsetInMin)) {
+ throw new IllegalArgumentException("Timezone offset out of range: " + offsetInMin);
+ }
+ }
+}