diff --git ql/pom.xml ql/pom.xml
index 3632a5efe4..aa23d5e6e8 100644
--- ql/pom.xml
+++ ql/pom.xml
@@ -98,6 +98,11 @@
hive-spark-client
${project.version}
+
+ org.apache.hive
+ hive-storage-api
+ ${project.version}
+
com.esotericsoftware
diff --git serde/pom.xml serde/pom.xml
index 9f1b146d02..e97551bbeb 100644
--- serde/pom.xml
+++ serde/pom.xml
@@ -49,6 +49,11 @@
hive-shims
${project.version}
+
+ org.apache.hive
+ hive-storage-api
+ ${project.version}
+
com.google.code.findbugs
diff --git storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
new file mode 100644
index 0000000000..c73d02eed0
--- /dev/null
+++ storage-api/src/java/org/apache/hadoop/hive/common/type/CalendarUtils.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.type;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Conversion utilities from the hybrid Julian/Gregorian calendar to/from the
+ * proleptic Gregorian.
+ *
+ * The semantics here are to hold the string representation constant and change
+ * the epoch offset rather than holding the instant in time constant and change
+ * the string representation.
+ *
+ * These utilities will be fast for the common case (> 1582 AD), but slow for
+ * old dates.
+ */
+public class CalendarUtils {
+
+ public static final long SWITCHOVER_MILLIS;
+ public static final long SWITCHOVER_DAYS;
+
+ private static SimpleDateFormat createFormatter(String fmt, boolean proleptic) {
+ SimpleDateFormat result = new SimpleDateFormat(fmt);
+ GregorianCalendar calendar = new GregorianCalendar(UTC);
+ if (proleptic) {
+ calendar.setGregorianChange(new Date(Long.MIN_VALUE));
+ }
+ result.setCalendar(calendar);
+ return result;
+ }
+
+ private static final String DATE = "yyyy-MM-dd";
+ private static final String TIME = DATE + " HH:mm:ss.SSS";
+ private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
+ private static final ThreadLocal HYBRID_DATE_FORMAT =
+ ThreadLocal.withInitial(() -> createFormatter(DATE, false));
+ private static final ThreadLocal HYBRID_TIME_FORMAT =
+ ThreadLocal.withInitial(() -> createFormatter(TIME, false));
+
+ private static final ThreadLocal PROLEPTIC_DATE_FORMAT =
+ ThreadLocal.withInitial(() -> createFormatter(DATE, true));
+ private static final ThreadLocal PROLEPTIC_TIME_FORMAT =
+ ThreadLocal.withInitial(() -> createFormatter(TIME, true));
+
+ static {
+ // Get the last day where the two calendars agree with each other.
+ try {
+ SWITCHOVER_MILLIS = HYBRID_DATE_FORMAT.get().parse("1582-10-15").getTime();
+ SWITCHOVER_DAYS = TimeUnit.MILLISECONDS.toDays(SWITCHOVER_MILLIS);
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Can't parse switch over date", e);
+ }
+ }
+
+ /**
+ * Convert an epoch day from the hybrid Julian/Gregorian calendar to the
+ * proleptic Gregorian.
+ * @param hybrid day of epoch in the hybrid Julian/Gregorian
+ * @return day of epoch in the proleptic Gregorian
+ */
+ public static int convertDateToProleptic(int hybrid) {
+ int proleptic = hybrid;
+ if (hybrid < SWITCHOVER_DAYS) {
+ String dateStr = HYBRID_DATE_FORMAT.get().format(
+ new Date(TimeUnit.DAYS.toMillis(hybrid)));
+ try {
+ proleptic = (int) TimeUnit.MILLISECONDS.toDays(
+ PROLEPTIC_DATE_FORMAT.get().parse(dateStr).getTime());
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Can't parse " + dateStr, e);
+ }
+ }
+ return proleptic;
+ }
+
+ /**
+ * Convert an epoch day from the proleptic Gregorian calendar to the hybrid
+ * Julian/Gregorian.
+ * @param proleptic day of epoch in the proleptic Gregorian
+ * @return day of epoch in the hybrid Julian/Gregorian
+ */
+ public static int convertDateToHybrid(int proleptic) {
+ int hyrbid = proleptic;
+ if (proleptic < SWITCHOVER_DAYS) {
+ String dateStr = PROLEPTIC_DATE_FORMAT.get().format(
+ new Date(TimeUnit.DAYS.toMillis(proleptic)));
+ try {
+ hyrbid = (int) TimeUnit.MILLISECONDS.toDays(
+ HYBRID_DATE_FORMAT.get().parse(dateStr).getTime());
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Can't parse " + dateStr, e);
+ }
+ }
+ return hyrbid;
+ }
+
+ public static int convertDate(int original,
+ boolean fromProleptic,
+ boolean toProleptic) {
+ if (fromProleptic != toProleptic) {
+ return toProleptic
+ ? convertDateToProleptic(original)
+ : convertDateToHybrid(original);
+ } else {
+ return original;
+ }
+ }
+
+ public static long convertTime(long original,
+ boolean fromProleptic,
+ boolean toProleptic) {
+ if (fromProleptic != toProleptic) {
+ return toProleptic
+ ? convertTimeToProleptic(original)
+ : convertTimeToHybrid(original);
+ } else {
+ return original;
+ }
+ }
+ /**
+ * Convert epoch millis from the hybrid Julian/Gregorian calendar to the
+ * proleptic Gregorian.
+ * @param hybrid millis of epoch in the hybrid Julian/Gregorian
+ * @return millis of epoch in the proleptic Gregorian
+ */
+ public static long convertTimeToProleptic(long hybrid) {
+ long proleptic = hybrid;
+ if (hybrid < SWITCHOVER_MILLIS) {
+ String dateStr = HYBRID_TIME_FORMAT.get().format(new Date(hybrid));
+ try {
+ proleptic = PROLEPTIC_TIME_FORMAT.get().parse(dateStr).getTime();
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Can't parse " + dateStr, e);
+ }
+ }
+ return proleptic;
+ }
+
+ /**
+ * Convert epoch millis from the proleptic Gregorian calendar to the hybrid
+ * Julian/Gregorian.
+ * @param proleptic millis of epoch in the proleptic Gregorian
+ * @return millis of epoch in the hybrid Julian/Gregorian
+ */
+ public static long convertTimeToHybrid(long proleptic) {
+ long hybrid = proleptic;
+ if (proleptic < SWITCHOVER_MILLIS) {
+ String dateStr = PROLEPTIC_TIME_FORMAT.get().format(new Date(proleptic));
+ try {
+ hybrid = HYBRID_TIME_FORMAT.get().parse(dateStr).getTime();
+ } catch (ParseException e) {
+ throw new IllegalArgumentException("Can't parse " + dateStr, e);
+ }
+ }
+ return hybrid;
+ }
+
+ /**
+ *
+ * Formats epoch day to date according to proleptic or hybrid calendar
+ *
+ * @param epochDay epoch day
+ * @param useProleptic if true - uses proleptic formatter, else uses hybrid formatter
+ * @return formatted date
+ */
+ public static String formatDate(long epochDay, boolean useProleptic) {
+ long millis = TimeUnit.DAYS.toMillis(epochDay);
+ return useProleptic ? PROLEPTIC_DATE_FORMAT.get().format(millis)
+ : HYBRID_DATE_FORMAT.get().format(millis);
+ }
+
+ private CalendarUtils() {
+ throw new UnsupportedOperationException();
+ }
+}
\ No newline at end of file
diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
index 3dac667f5d..6608a6eac3 100644
--- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
+++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
@@ -17,37 +17,13 @@
*/
package org.apache.hadoop.hive.ql.exec.vector;
-import java.text.SimpleDateFormat;
-import java.util.GregorianCalendar;
-import java.util.TimeZone;
-import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.hive.common.type.CalendarUtils;
/**
* This class extends LongColumnVector in order to introduce some date-specific semantics. In
* DateColumnVector, the elements of vector[] represent the days since 1970-01-01
*/
public class DateColumnVector extends LongColumnVector {
- private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
- private static final GregorianCalendar PROLEPTIC_GREGORIAN_CALENDAR = new GregorianCalendar(UTC);
- private static final GregorianCalendar GREGORIAN_CALENDAR = new GregorianCalendar(UTC);
-
- private static final SimpleDateFormat PROLEPTIC_GREGORIAN_DATE_FORMATTER =
- new SimpleDateFormat("yyyy-MM-dd");
- private static final SimpleDateFormat GREGORIAN_DATE_FORMATTER =
- new SimpleDateFormat("yyyy-MM-dd");
-
- /**
- * -141427: hybrid: 1582-10-15 proleptic: 1582-10-15
- * -141428: hybrid: 1582-10-04 proleptic: 1582-10-14
- */
- private static final int CUTOVER_DAY_EPOCH = -141427; // it's 1582-10-15 in both calendars
-
- static {
- PROLEPTIC_GREGORIAN_CALENDAR.setGregorianChange(new java.util.Date(Long.MIN_VALUE));
-
- PROLEPTIC_GREGORIAN_DATE_FORMATTER.setCalendar(PROLEPTIC_GREGORIAN_CALENDAR);
- GREGORIAN_DATE_FORMATTER.setCalendar(GREGORIAN_CALENDAR);
- }
private boolean usingProlepticCalendar = false;
@@ -76,24 +52,16 @@ public void changeCalendar(boolean useProleptic, boolean updateData) {
private void updateDataAccordingProlepticSetting() throws Exception {
for (int i = 0; i < vector.length; i++) {
- if (vector[i] >= CUTOVER_DAY_EPOCH) { // no need for conversion
+ if (vector[i] >= CalendarUtils.SWITCHOVER_DAYS) { // no need for conversion
continue;
}
- long millis = TimeUnit.DAYS.toMillis(vector[i]);
- String originalFormatted = usingProlepticCalendar ? GREGORIAN_DATE_FORMATTER.format(millis)
- : PROLEPTIC_GREGORIAN_DATE_FORMATTER.format(millis);
-
- millis = (usingProlepticCalendar ? PROLEPTIC_GREGORIAN_DATE_FORMATTER.parse(originalFormatted)
- : GREGORIAN_DATE_FORMATTER.parse(originalFormatted)).getTime();
-
- vector[i] = TimeUnit.MILLISECONDS.toDays(millis);
+ vector[i] = usingProlepticCalendar ? CalendarUtils.convertDateToProleptic((int) vector[i]) : CalendarUtils
+ .convertDateToHybrid((int) vector[i]);
}
}
public String formatDate(int i) {
- long millis = TimeUnit.DAYS.toMillis(vector[i]);
- return usingProlepticCalendar ? PROLEPTIC_GREGORIAN_DATE_FORMATTER.format(millis)
- : GREGORIAN_DATE_FORMATTER.format(millis);
+ return CalendarUtils.formatDate(vector[i], usingProlepticCalendar);
}
public DateColumnVector setUsingProlepticCalendar(boolean usingProlepticCalendar) {
diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
index d5dfc9295a..7807e69ffe 100644
--- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
+++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -18,14 +18,12 @@
package org.apache.hadoop.hive.ql.exec.vector;
import java.sql.Timestamp;
-import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.Arrays;
-import java.util.GregorianCalendar;
-import java.util.TimeZone;
+import org.apache.hadoop.hive.common.type.CalendarUtils;
import org.apache.hadoop.io.Writable;
/**
@@ -41,26 +39,6 @@
* using the scratch timestamp, and then perhaps update the column vector row with a result.
*/
public class TimestampColumnVector extends ColumnVector {
- private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
- private static final GregorianCalendar PROLEPTIC_GREGORIAN_CALENDAR_UTC =
- new GregorianCalendar(UTC);
- private static final GregorianCalendar GREGORIAN_CALENDAR_UTC =
- new GregorianCalendar(UTC);
-
- private static final SimpleDateFormat PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- private static final SimpleDateFormat GREGORIAN_TIMESTAMP_FORMATTER_UTC =
- new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-
- static {
- PROLEPTIC_GREGORIAN_CALENDAR_UTC.setGregorianChange(new java.util.Date(Long.MIN_VALUE));
-
- PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC.setCalendar(PROLEPTIC_GREGORIAN_CALENDAR_UTC);
- GREGORIAN_TIMESTAMP_FORMATTER_UTC.setCalendar(GREGORIAN_CALENDAR_UTC);
- }
-
- // it's 1582-10-15 in both calendars
- private static final int CUTOVER_MILLIS_EPOCH = -141427 * 24 * 60 * 60 * 1000;
/*
* The storage arrays for this column vector corresponds to the storage of a Timestamp:
@@ -594,18 +572,14 @@ public void changeCalendar(boolean useProleptic, boolean updateData) {
private void updateDataAccordingProlepticSetting() throws Exception {
for (int i = 0; i < nanos.length; i++) {
- if (time[i] >= CUTOVER_MILLIS_EPOCH) { // no need for conversion
+ if (time[i] >= CalendarUtils.SWITCHOVER_MILLIS) { // no need for conversion
continue;
}
asScratchTimestamp(i);
long offset = 0;
- String formatted =
- usingProlepticCalendar ? GREGORIAN_TIMESTAMP_FORMATTER_UTC.format(scratchTimestamp)
- : PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC.format(scratchTimestamp);
- long millis = usingProlepticCalendar
- ? PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC.parse(formatted).getTime()
- : GREGORIAN_TIMESTAMP_FORMATTER_UTC.parse(formatted).getTime();
+ long millis = usingProlepticCalendar ? CalendarUtils.convertTimeToProleptic(scratchTimestamp.getTime())
+ : CalendarUtils.convertTimeToHybrid(scratchTimestamp.getTime());
Timestamp newTimeStamp = Timestamp.from(Instant.ofEpochMilli(millis));
diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java
index 0d4dc5dc38..d45822d172 100644
--- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java
+++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java
@@ -20,6 +20,9 @@
import org.junit.Assert;
import org.junit.Test;
+import java.util.ArrayList;
+import java.util.List;
+
public class TestDateColumnVector {
/**
* Test case for DateColumnVector's changeCalendar
@@ -77,4 +80,44 @@ private void setDateAndVerifyProlepticUpdate(long longDay, String expectedDateSt
" new = " + newUseProleptic,
expectedDateString, dateColumnVector.formatDate(0));
}
+
+ @Test(timeout = 300_000)
+ public void testMultiThreaded() throws Exception {
+
+ //when java DateTimeFormatter/GregorianCalendar race was not handled, used to throw exceptions like -
+
+ // 1) java.lang.NumberFormatException: For input string: "" OR java.lang.NumberFormatException: For input string: ".821582E.821582E44"
+
+ // 2) Caused by: java.lang.ArrayIndexOutOfBoundsException: -5325980
+ // at sun.util.calendar.BaseCalendar.getCalendarDateFromFixedDate(BaseCalendar.java:453)
+ // at java.util.GregorianCalendar.computeFields(GregorianCalendar.java:2397)
+
+ // create 5 threads and start manipulating vectors, should not throw any exceptions now.
+
+ List threads = new ArrayList<>();
+
+ threads.add(startVectorManipulationThread(50000, -141428));
+ threads.add(startVectorManipulationThread(50000, -141430));
+ threads.add(startVectorManipulationThread(50000, -16768));
+ threads.add(startVectorManipulationThread(50000, -499952));
+ threads.add(startVectorManipulationThread(50000, -499955));
+
+ for (Thread thread : threads) {
+ thread.join();
+ }
+
+ }
+
+ private Thread startVectorManipulationThread(final int vectorLength, final int epochDay) {
+ Thread thread = new Thread(() -> {
+ DateColumnVector columnVector = new DateColumnVector(vectorLength).setUsingProlepticCalendar(true);
+ for (int i = 0; i < vectorLength; i++) {
+ columnVector.vector[i] = epochDay;
+ }
+ columnVector.changeCalendar(false, true);
+ });
+ thread.start();
+ return thread;
+ }
+
}
diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
index 333a5b57ad..2d85b115d2 100644
--- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
+++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
@@ -24,7 +24,9 @@
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.Instant;
+import java.util.ArrayList;
import java.util.GregorianCalendar;
+import java.util.List;
import java.util.Random;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
@@ -210,4 +212,38 @@ private DateFormat getTestFormatter(boolean useProleptic) {
return testFormatter;
}
+
+
+ @Test(timeout = 300_000)
+ public void testMultiThreaded() throws Exception {
+
+ // similar to TestDateColumnVector#testMultiThreaded
+
+ List threads = new ArrayList<>();
+
+ threads.add(startVectorManipulationThread(50000, -141428));
+ threads.add(startVectorManipulationThread(50000, -141430));
+ threads.add(startVectorManipulationThread(50000, -16768));
+ threads.add(startVectorManipulationThread(50000, -499952));
+ threads.add(startVectorManipulationThread(50000, -499955));
+
+ for (Thread thread : threads) {
+ thread.join();
+ }
+
+ }
+
+ private Thread startVectorManipulationThread(final int vectorLength, final long millis) {
+ Thread thread = new Thread(() -> {
+ TimestampColumnVector columnVector = new TimestampColumnVector(vectorLength).setUsingProlepticCalendar(true);
+ for (int i = 0; i < vectorLength; i++) {
+ columnVector.time[i] = millis;
+ columnVector.nanos[i] = 1;
+ }
+ columnVector.changeCalendar(false, true);
+ });
+ thread.start();
+ return thread;
+ }
+
}