diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java index 8a072326e3936122383325e25095ba42aa966c98..461711f8e39d168c6b14f31e50b151e776cfdead 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java @@ -20,11 +20,11 @@ import org.apache.hadoop.hive.common.format.datetime.HiveSqlDateTimeFormatter; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import java.sql.Date; import java.text.SimpleDateFormat; -import java.util.TimeZone; public class CastDateToString extends LongToStringUnaryUDF { private static final long serialVersionUID = 1L; @@ -34,13 +34,13 @@ public CastDateToString() { super(); formatter = new SimpleDateFormat("yyyy-MM-dd"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + formatter.setCalendar(DateTimeMath.getProlepticGregorianCalendarUTC()); } public CastDateToString(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); formatter = new SimpleDateFormat("yyyy-MM-dd"); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + formatter.setCalendar(DateTimeMath.getProlepticGregorianCalendarUTC()); } // The assign method will be overridden for CHAR and VARCHAR. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java index 837de9d0cadf8496030134937ad91ef2c9ef9deb..ac6519b625711fc115bceb855fc6de6cc44f17bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -20,13 +20,13 @@ import java.util.Arrays; import java.util.Calendar; -import java.util.TimeZone; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -34,7 +34,6 @@ import com.google.common.base.Preconditions; - /** * Abstract class to return various fields from a Timestamp or Date. */ @@ -44,8 +43,7 @@ protected final int colNum; protected final int field; - protected transient final Calendar calendar = Calendar.getInstance( - TimeZone.getTimeZone("UTC")); + protected final transient Calendar calendar = DateTimeMath.getProlepticGregorianCalendarUTC(); public VectorUDFTimestampFieldDate(int field, int colNum, int outputColumnNum) { super(outputColumnNum); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index 9acfa86104ab65a7b109387b1aebab2750fa456d..9ef90947b235c2f22a63cb1a985fa915efbb5dc3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -23,10 +23,10 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import java.text.ParseException; import java.util.Calendar; -import java.util.TimeZone; /** * Abstract class to return various fields from a String. @@ -40,8 +40,7 @@ protected final int fieldLength; private static final String patternMin = "0000-00-00 00:00:00.000000000"; private static final String patternMax = "9999-19-99 29:59:59.999999999"; - protected transient final Calendar calendar = Calendar.getInstance( - TimeZone.getTimeZone("UTC")); + protected final transient Calendar calendar = DateTimeMath.getProlepticGregorianCalendarUTC(); public VectorUDFTimestampFieldString(int colNum, int outputColumnNum, int fieldStart, int fieldLength) { super(outputColumnNum); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java index 94e8b474873958d901336c4f79b37e7fdae357cd..142d05f86a9e8ddf8eb0a73647b733e94ed9e716 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java @@ -20,7 +20,6 @@ import java.util.Arrays; import java.util.Calendar; -import java.util.TimeZone; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -28,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hive.common.util.DateUtils; @@ -44,8 +44,7 @@ protected final int colNum; protected final int field; - protected transient final Calendar calendar = Calendar.getInstance( - TimeZone.getTimeZone("UTC")); + protected final transient Calendar calendar = DateTimeMath.getProlepticGregorianCalendarUTC(); public VectorUDFTimestampFieldTimestamp(int field, int colNum, int outputColumnNum) { super(outputColumnNum); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java index 72fa2630b62711e2ade2b30a0b61b1afcbbb49c7..cbb8fd09c367d930f3a041caeff67be16305175a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.NDV; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -37,7 +38,6 @@ import org.apache.hadoop.io.IntWritable; import java.util.Calendar; -import java.util.TimeZone; /** * UDFDayOfMonth. @@ -60,7 +60,7 @@ private transient PrimitiveObjectInspector.PrimitiveCategory[] inputTypes = new PrimitiveObjectInspector.PrimitiveCategory[1]; private final IntWritable output = new IntWritable(); - private final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + private final Calendar calendar = DateTimeMath.getProlepticGregorianCalendarUTC(); @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java index 1a8b2da7a9700ac992703fb27f62d4623eb73226..e55dbc4633b7af49fc28768450e9c442c4f37e89 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.NDV; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -37,7 +38,6 @@ import org.apache.hadoop.io.IntWritable; import java.util.Calendar; -import java.util.TimeZone; /** * UDFMonth. @@ -60,7 +60,7 @@ private transient PrimitiveObjectInspector.PrimitiveCategory[] inputTypes = new PrimitiveObjectInspector.PrimitiveCategory[1]; private final IntWritable output = new IntWritable(); - private final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + private final Calendar calendar = DateTimeMath.getProlepticGregorianCalendarUTC(); @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java index 18ca9a7e9bf84af22674b3ea7b31d4cde0ad9a76..6d64da115c21aa920f20c5932c40d76f0dc74c88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf; import java.util.Calendar; -import java.util.TimeZone; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; @@ -30,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearString; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearTimestamp; import org.apache.hadoop.hive.ql.udf.generic.NDV; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.io.IntWritable; @@ -39,7 +39,7 @@ * UDFWeekOfYear. * */ -@Description(name = "yearweek", +@Description(name = "weekofyear", value = "_FUNC_(date) - Returns the week of the year of the given date. A week " + "is considered to start on a Monday and week 1 is the first week with >3 days.", extended = "Examples:\n" @@ -52,7 +52,7 @@ private final IntWritable result = new IntWritable(); - private final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + private final Calendar calendar = DateTimeMath.getProlepticGregorianCalendarUTC(); public UDFWeekOfYear() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java index fcbb57f83874ced64de08cb983cf6c00d2bb3708..8ac1dc5b9dde153b0c22770a158d08f74bd65eb5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.NDV; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -37,7 +38,6 @@ import org.apache.hadoop.io.IntWritable; import java.util.Calendar; -import java.util.TimeZone; /** * UDFYear. @@ -60,7 +60,7 @@ private transient PrimitiveObjectInspector.PrimitiveCategory[] inputTypes = new PrimitiveObjectInspector.PrimitiveCategory[1]; private final IntWritable output = new IntWritable(); - private final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + private final Calendar calendar = DateTimeMath.getProlepticGregorianCalendarUTC(); @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java index 6df0913de6c8ece7cef360b1213706979283ae5e..7ca8dad5aaab1f1757e9ea2fc22afe447eb4096f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java @@ -24,7 +24,6 @@ import java.text.SimpleDateFormat; import java.util.Calendar; -import java.util.TimeZone; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; @@ -32,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; @@ -64,7 +64,7 @@ private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[3]; private final Text output = new Text(); private transient SimpleDateFormat formatter = null; - private final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + private final Calendar calendar = DateTimeMath.getProlepticGregorianCalendarUTC(); private transient Integer numMonthsConst; private transient boolean isNumMonthsConst; @@ -82,7 +82,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen String fmtStr = getConstantStringValue(arguments, 2); if (fmtStr != null) { formatter = new SimpleDateFormat(fmtStr); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + formatter.setCalendar(calendar); } } else { throw new UDFArgumentTypeException(2, getFuncName() + " only takes constant as " @@ -92,6 +92,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen if (formatter == null) { //If the DateFormat is not provided by the user or is invalid, use the default format YYYY-MM-dd formatter = DateUtils.getDateFormat(); + formatter.setCalendar(calendar); } // the function should support both short date and full timestamp format diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java index 6d3e86f9212fd3c082390790b73667f74f925c3b..a30b9160def83f491f60cb66835df10c21041e17 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java @@ -21,7 +21,6 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; import java.text.SimpleDateFormat; -import java.util.TimeZone; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; @@ -29,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; @@ -80,7 +80,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen if (fmtStr != null) { try { formatter = new SimpleDateFormat(fmtStr); - formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + formatter.setCalendar(DateTimeMath.getProlepticGregorianCalendarUTC()); } catch (IllegalArgumentException e) { // ignore } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java index e0db417dab31db951cbe0425107007f9273b1e5a..ad128d800d41fef7475eb5924803ab9fbe11be11 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java @@ -29,13 +29,13 @@ import java.math.BigDecimal; import java.util.Calendar; -import java.util.TimeZone; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -66,8 +66,8 @@ private transient PrimitiveCategory[] tsInputTypes = new PrimitiveCategory[2]; private transient Converter[] dtConverters = new Converter[2]; private transient PrimitiveCategory[] dtInputTypes = new PrimitiveCategory[2]; - private final Calendar cal1 = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - private final Calendar cal2 = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + private final Calendar cal1 = DateTimeMath.getProlepticGregorianCalendarUTC(); + private final Calendar cal2 = DateTimeMath.getProlepticGregorianCalendarUTC(); private final DoubleWritable output = new DoubleWritable(); private boolean isRoundOffNeeded = true; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java b/ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java index 20acfa253c3c1a32f82499e85c240633a5fb1d2f..53395b6c7e9d2d3f19509caf2d194d970da159ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java @@ -25,6 +25,7 @@ import org.apache.hive.common.util.DateUtils; import java.util.Calendar; +import java.util.GregorianCalendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; @@ -600,4 +601,16 @@ public boolean subtract(java.sql.Timestamp left, java.sql.Timestamp right, result.set(totalSeconds, nanosResult.nanos); return true; } + + /** + * TODO - this is a temporary fix for handling Julian calendar dates. + * Returns a Gregorian calendar that can be used from year 0+ instead of default 1582.10.15. + * This is desirable for some UDFs that work on dates which normally would use Julian calendar. + * @return the calendar + */ + public static Calendar getProlepticGregorianCalendarUTC() { + GregorianCalendar calendar = new GregorianCalendar(TimeZone.getTimeZone("UTC".intern())); + calendar.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); + return calendar; + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index a449ea143da0a02a3528525acf2919eb8c1554b6..ea4aeaef78e9a7153490ab6bc0c7e7e6f0098ab9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -25,6 +25,7 @@ import java.math.BigDecimal; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; +import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.util.DateTimeMath; import org.apache.hadoop.hive.serde2.RandomTypeUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; @@ -74,9 +76,6 @@ public void testVectorCastDoubleToLong() throws HiveException { Assert.assertEquals(1, resultV.vector[6]); } - // +8 hours from PST to GMT, needed because java.sql.Date will subtract 8 hours from final - // value because VM in test time zone is PST. - private static final long TIME_DIFFERENCE = 28800000L; @Test public void testCastDateToString() throws HiveException { int[] intValues = new int[100]; @@ -86,10 +85,12 @@ public void testCastDateToString() throws HiveException { VectorExpression expr = new CastDateToString(0, 1); expr.evaluate(b); + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + formatter.setCalendar(DateTimeMath.getProlepticGregorianCalendarUTC()); + String expected, result; for (int i = 0; i < intValues.length; i++) { - expected = - new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]) + TIME_DIFFERENCE).toString(); + expected = formatter.format(new java.sql.Date(DateWritableV2.daysToMillis(intValues[i]))); byte[] subbyte = Arrays.copyOfRange(resultV.vector[i], resultV.start[i], resultV.start[i] + resultV.length[i]); result = new String(subbyte, StandardCharsets.UTF_8); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java index eb104bdb0ca84e010fb868c8cd708c2550e12504..1b5fd42491f10eeb7419724fce3dd7fc77ea3f8a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFAddMonths.java @@ -66,6 +66,8 @@ public void testAddMonthsInt() throws HiveException { runAndVerify("2016-02-29", -12, "2015-02-28", udf); runAndVerify("2016-01-29", 1, "2016-02-29", udf); runAndVerify("2016-02-29", -1, "2016-01-31", udf); + runAndVerify("1001-10-05", 1, "1001-11-05", udf); + runAndVerify("1582-10-05", 1, "1582-11-05", udf); // ts str runAndVerify("2014-01-14 10:30:00", 1, "2014-02-14", udf); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java index f0a5d3f19ef5b1f7b6aaa3eba662eb8c3d73a78f..246fbd380c4ece0636101a9e92864209ddcf2881 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormat.java @@ -45,7 +45,7 @@ public void testDateFormatStr() throws HiveException { Text fmtText = new Text("EEEE"); ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); - ObjectInspector[] arguments = { valueOI0, valueOI1 }; + ObjectInspector[] arguments = {valueOI0, valueOI1}; udf.initialize(arguments); @@ -157,6 +157,19 @@ public void testWrongFmt() throws HiveException { runAndVerifyStr("2015-04-05", fmtText, null, udf); } + + @Test + public void testJulianDates() throws HiveException { + GenericUDFDateFormat udf = new GenericUDFDateFormat(); + ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + Text fmtText = new Text("dd---MM--yyyy"); + ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory + .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, fmtText); + ObjectInspector[] arguments = { valueOI0, valueOI1 }; + udf.initialize(arguments); + runAndVerifyStr("1001-01-05", fmtText, "05---01--1001", udf); + } + private void runAndVerifyStr(String str, Text fmtText, String expResult, GenericUDF udf) throws HiveException { DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new Text(str) : null); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java index 707e84111f0177360e0b26ab7f2c2fb0cd298694..e386cfa28f6843fb88e5c273e9d7a2f511307496 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java @@ -131,6 +131,9 @@ public void testMonthsBetweenForString(GenericUDFMonthsBetween udf) throws HiveE runTestStr(null, null, null, udf); runTestStr("2003-04-23", "2002-04-24", 11.96774194, udf); + + //Test for Julian vs Gregorian dates + runTestStr("1582-10-05", "1582-11-05", -1., udf); } diff --git a/ql/src/test/queries/clientpositive/udf_day.q b/ql/src/test/queries/clientpositive/udf_day.q index fb9b57da015bb172e9f26035adabf53f4a6952f4..4b2ad426bf1ea5553c0f14d6c6cf1c17b0bf9f2c 100644 --- a/ql/src/test/queries/clientpositive/udf_day.q +++ b/ql/src/test/queries/clientpositive/udf_day.q @@ -1,2 +1,5 @@ DESCRIBE FUNCTION day; DESCRIBE FUNCTION EXTENDED day; + +select month('2005-01-02'); +select month('1001-01-05'); diff --git a/ql/src/test/queries/clientpositive/udf_month.q b/ql/src/test/queries/clientpositive/udf_month.q index 0a582ba37e312be1f64959e21c2567cd9fe3e8d9..2b4363e6d4595696768a96ead41aafd7f45dd562 100644 --- a/ql/src/test/queries/clientpositive/udf_month.q +++ b/ql/src/test/queries/clientpositive/udf_month.q @@ -1,2 +1,5 @@ -DESCRIBE FUNCTION minute; -DESCRIBE FUNCTION EXTENDED minute; +DESCRIBE FUNCTION month; +DESCRIBE FUNCTION EXTENDED month; + +select month('2005-01-02'); +select month('1001-01-05'); diff --git a/ql/src/test/queries/clientpositive/udf_weekofyear.q b/ql/src/test/queries/clientpositive/udf_weekofyear.q index 18ab8280054c825b1d31dc01f47aa38be9c6231a..0122e685fb7608b9d7f4657f4e738b8b24be4f92 100644 --- a/ql/src/test/queries/clientpositive/udf_weekofyear.q +++ b/ql/src/test/queries/clientpositive/udf_weekofyear.q @@ -7,3 +7,7 @@ DESCRIBE FUNCTION EXTENDED weekofyear; SELECT weekofyear('1980-01-01'), weekofyear('1980-01-06'), weekofyear('1980-01-07'), weekofyear('1980-12-31'), weekofyear('1984-1-1'), weekofyear('2008-02-20 00:00:00'), weekofyear('1980-12-28 23:59:59'), weekofyear('1980-12-29 23:59:59') FROM src tablesample (1 rows); + +SELECT weekofyear('1007-01-01 10:13:14'); +SELECT weekofyear('1008-01-01 10:13:14'); +SELECT weekofyear('1009-01-01 10:13:14'); diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 38b053703bf217234b8e792e43d9b761e9fff39b..fbbf094bcfc5d4ec8736b3bc282a478cbf630f2d 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -415,7 +415,7 @@ POSTHOOK: Input: default@alltypesorc_string -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:55.451 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:55.451 NULL 1969-12-31 15:59:55.451 -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:58.174 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:58.174 NULL 1969-12-31 15:59:58.174 -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:58.456 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:58.456 NULL 1969-12-31 15:59:58.456 --45479202281 528 10 25 25 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL +-45479202281 528 10 27 27 44 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL 1632453512 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 1632453512 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 1632453512 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 @@ -614,7 +614,7 @@ POSTHOOK: Input: default@alltypesorc_string -30 1969 12 31 31 1 23 59 30 -30 1969 12 31 31 1 23 59 30 -30 1969 12 31 31 1 23 59 30 --62018199211 4 9 24 22 39 18 26 29 +-62018199211 4 9 22 22 39 18 26 29 1365554626 2013 4 10 10 15 0 43 46 206730996125 8521 1 16 16 3 20 42 5 271176065 1978 8 5 5 31 14 41 5 @@ -969,7 +969,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_wrong #### A masked pattern was here #### -NULL 2 12 2 NULL 49 4 40 39 +NULL 2 11 30 NULL 48 4 40 39 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT diff --git a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 5671a7c0a19472e6918657f7e89c2395f5251a17..b58d0084a235a7e650d391f66b60258098d37a8f 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -413,7 +413,7 @@ POSTHOOK: Input: default@alltypesorc_string -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:55.451 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:55.451 NULL 1969-12-31 15:59:55.451 -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:58.174 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:58.174 NULL 1969-12-31 15:59:58.174 -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:58.456 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:58.456 NULL 1969-12-31 15:59:58.456 --45479202281 528 10 25 25 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL +-45479202281 528 10 27 27 44 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL 1632453512 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 1632453512 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 1632453512 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 @@ -610,7 +610,7 @@ POSTHOOK: Input: default@alltypesorc_string -30 1969 12 31 31 1 23 59 30 -30 1969 12 31 31 1 23 59 30 -30 1969 12 31 31 1 23 59 30 --62018199211 4 9 24 22 39 18 26 29 +-62018199211 4 9 22 22 39 18 26 29 1365554626 2013 4 10 10 15 0 43 46 206730996125 8521 1 16 16 3 20 42 5 271176065 1978 8 5 5 31 14 41 5 @@ -961,7 +961,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_wrong #### A masked pattern was here #### -NULL 2 12 2 NULL 49 4 40 39 +NULL 2 11 30 NULL 48 4 40 39 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT diff --git a/ql/src/test/results/clientpositive/udf_day.q.out b/ql/src/test/results/clientpositive/udf_day.q.out index 48e816123e5b07d817a8ee008f701a1e4c7cf013..8e12825caee19e388ba3dbfdd4813ed0b2bc4c85 100644 --- a/ql/src/test/results/clientpositive/udf_day.q.out +++ b/ql/src/test/results/clientpositive/udf_day.q.out @@ -18,3 +18,21 @@ param can be one of: 30 Function class:org.apache.hadoop.hive.ql.udf.UDFDayOfMonth Function type:BUILTIN +PREHOOK: query: select month('2005-01-02') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select month('2005-01-02') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1 +PREHOOK: query: select month('1001-01-05') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select month('1001-01-05') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1 diff --git a/ql/src/test/results/clientpositive/udf_month.q.out b/ql/src/test/results/clientpositive/udf_month.q.out index c07c98fafb9eb0687791ce673f3a8e32425a1452..068f2ff2f4b260980e875383214e83b1f86074a2 100644 --- a/ql/src/test/results/clientpositive/udf_month.q.out +++ b/ql/src/test/results/clientpositive/udf_month.q.out @@ -1,20 +1,37 @@ -PREHOOK: query: DESCRIBE FUNCTION minute +PREHOOK: query: DESCRIBE FUNCTION month PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION minute +POSTHOOK: query: DESCRIBE FUNCTION month POSTHOOK: type: DESCFUNCTION -minute(param) - Returns the minute component of the string/timestamp/interval -PREHOOK: query: DESCRIBE FUNCTION EXTENDED minute +month(param) - Returns the month component of the date/timestamp/interval +PREHOOK: query: DESCRIBE FUNCTION EXTENDED month PREHOOK: type: DESCFUNCTION -POSTHOOK: query: DESCRIBE FUNCTION EXTENDED minute +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED month POSTHOOK: type: DESCFUNCTION -minute(param) - Returns the minute component of the string/timestamp/interval +month(param) - Returns the month component of the date/timestamp/interval param can be one of: -1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'HH:mm:ss'. -2. A timestamp value -3. A day-time interval valueExample: - > SELECT minute('2009-07-30 12:58:59') FROM src LIMIT 1; - 58 - > SELECT minute('12:58:59') FROM src LIMIT 1; - 58 -Function class:org.apache.hadoop.hive.ql.udf.UDFMinute +1. A string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'. +2. A date value +3. A timestamp value +4. A year-month interval valueExample: + > SELECT month('2009-07-30') FROM src LIMIT 1; + 7 +Function class:org.apache.hadoop.hive.ql.udf.UDFMonth Function type:BUILTIN +PREHOOK: query: select month('2005-01-02') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select month('2005-01-02') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1 +PREHOOK: query: select month('1001-01-05') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select month('1001-01-05') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1 diff --git a/ql/src/test/results/clientpositive/udf_weekofyear.q.out b/ql/src/test/results/clientpositive/udf_weekofyear.q.out index 6e6c661dbf6c0313f7a59a2804fa7cc789e67b08..f003af05c7e4ebe69aa89852dcc8f6f74b6a2496 100644 --- a/ql/src/test/results/clientpositive/udf_weekofyear.q.out +++ b/ql/src/test/results/clientpositive/udf_weekofyear.q.out @@ -28,3 +28,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 1 1 2 1 52 8 52 1 +PREHOOK: query: SELECT weekofyear('1007-01-01 10:13:14') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT weekofyear('1007-01-01 10:13:14') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1 +PREHOOK: query: SELECT weekofyear('1008-01-01 10:13:14') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT weekofyear('1008-01-01 10:13:14') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +53 +PREHOOK: query: SELECT weekofyear('1009-01-01 10:13:14') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT weekofyear('1009-01-01 10:13:14') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +52 diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 9a42dc0fecad6987e0b71a2b8673df1f475972f5..81905997d416d11cf3ecb4c89cef7640e74f465c 100644 --- a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -398,7 +398,7 @@ POSTHOOK: Input: default@alltypesorc_string -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:55.451 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:55.451 NULL 1969-12-31 15:59:55.451 -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:58.174 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:58.174 NULL 1969-12-31 15:59:58.174 -29 1969 12 31 31 1 23 59 30 NULL 1969-12-31 23:59:30.929 1969-12-31 15:59:58.456 1319-02-02 16:31:57.778 1969-12-31 23:59:30.929 1969-12-31 15:59:58.456 NULL 1969-12-31 15:59:58.456 --45479202281 528 10 25 25 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL +-45479202281 528 10 27 27 44 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL 1632453512 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 1632453512 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 1632453512 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 @@ -580,7 +580,7 @@ POSTHOOK: Input: default@alltypesorc_string -30 1969 12 31 31 1 23 59 30 -30 1969 12 31 31 1 23 59 30 -30 1969 12 31 31 1 23 59 30 --62018199211 4 9 24 22 39 18 26 29 +-62018199211 4 9 22 22 39 18 26 29 1365554626 2013 4 10 10 15 0 43 46 206730996125 8521 1 16 16 3 20 42 5 271176065 1978 8 5 5 31 14 41 5 @@ -901,7 +901,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_wrong #### A masked pattern was here #### -NULL 2 12 2 NULL 49 4 40 39 +NULL 2 11 30 NULL 48 4 40 39 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT