diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDateToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDateToDate.java new file mode 100644 index 0000000000..d0e68aed95 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDateToDate.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +import java.util.Arrays; + +/** + * This is a superclass for unary functions and expressions taking a single timestamp and returning + * a timestamp, that operate directly on the input and set the output. + */ +public abstract class FuncDateToDate extends VectorExpression { + + private static final long serialVersionUID = 1L; + private final int inputColumn; + + public FuncDateToDate(int inputColumn, int outputColumnNum) { + super(outputColumnNum); + this.inputColumn = inputColumn; + } + + public FuncDateToDate() { + super(); + + // Dummy final assignments. + inputColumn = -1; + } + + protected abstract void func(LongColumnVector outputColVector, LongColumnVector inputColVector, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[inputColumn]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + + int[] sel = batch.selected; + int n = batch.size; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + + if (n == 0) { + + // Nothing to do + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + func(outputColVector, inputColVector, i); + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); + } + } + } else { + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); + } + } + } + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, inputColumn); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1) + .setArgumentTypes(getInputColumnType()) + .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } + + protected VectorExpressionDescriptor.ArgumentType getInputColumnType() { + return VectorExpressionDescriptor.ArgumentType.DATE; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToTimestamp.java new file mode 100644 index 0000000000..f558c57112 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToTimestamp.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * This is a superclass for unary functions and expressions taking a single timestamp and returning + * a timestamp, that operate directly on the input and set the output. + */ +public abstract class FuncTimestampToTimestamp extends VectorExpression { + + private static final long serialVersionUID = 1L; + private final int inputColumn; + + public FuncTimestampToTimestamp(int inputColumn, int outputColumnNum) { + super(outputColumnNum); + this.inputColumn = inputColumn; + } + + public FuncTimestampToTimestamp() { + super(); + + // Dummy final assignments. + inputColumn = -1; + } + + abstract protected void func(TimestampColumnVector outputColVector, TimestampColumnVector inputColVector, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; + + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + + if (n == 0) { + + // Nothing to do + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + func(outputColVector, inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + func(outputColVector, inputColVector, i); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + func(outputColVector, inputColVector, i); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + func(outputColVector, inputColVector, i); + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); + } + } + } else { + System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + func(outputColVector, inputColVector, i); + } + } + } + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, inputColumn); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION).setNumArguments(1) + .setArgumentTypes(getInputColumnType()) + .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } + + protected VectorExpressionDescriptor.ArgumentType getInputColumnType() { + return VectorExpressionDescriptor.ArgumentType.TIMESTAMP; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarDate.java new file mode 100644 index 0000000000..9e6384263b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarDate.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +import java.text.SimpleDateFormat; +import java.util.TimeZone; + +/** + * Vectorized version of GenericUDFDatetimeLegacyHybridCalendar (datetime_legacy_hybrid_calendar). + * Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar assuming that its internal + * days/milliseconds since epoch is calculated using the proleptic Gregorian calendar. + * Extends {@link FuncDateToDate} + */ + +public class VectorUDFDatetimeLegacyHybridCalendarDate extends FuncDateToDate { + private static final long serialVersionUID = 1L; + + // SimpleDateFormat doesn't serialize well; it's also not thread-safe + private static final ThreadLocal SIMPLE_DATE_FORMAT_THREAD_LOCAL = + ThreadLocal.withInitial(() -> { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + formatter.setLenient(false); + return formatter; + }); + + public VectorUDFDatetimeLegacyHybridCalendarDate() { + super(); + } + + public VectorUDFDatetimeLegacyHybridCalendarDate(int inputColumn, int outputColumnNum) { + super(inputColumn, outputColumnNum); + } + + protected void func(LongColumnVector outputColVector, LongColumnVector inputColVector, int i) { + // get number of milliseconds from number of days + Date inputDate = Date.ofEpochDay((int) inputColVector.vector[i]); + java.sql.Date oldDate = new java.sql.Date(inputDate.toEpochMilli()); + Date adjustedDate = Date.valueOf(SIMPLE_DATE_FORMAT_THREAD_LOCAL.get().format(oldDate)); + outputColVector.vector[i] = adjustedDate.toEpochDay(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarTimestamp.java new file mode 100644 index 0000000000..0bb93fff18 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDatetimeLegacyHybridCalendarTimestamp.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import java.text.SimpleDateFormat; +import java.util.TimeZone; + +/** + * Vectorized version of GenericUDFDatetimeLegacyHybridCalendar (datetime_legacy_hybrid_calendar). + * Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar assuming that its internal + * days/milliseconds since epoch is calculated using the proleptic Gregorian calendar. + * Extends {@link FuncTimestampToTimestamp} + */ + +public class VectorUDFDatetimeLegacyHybridCalendarTimestamp extends FuncTimestampToTimestamp { + private static final long serialVersionUID = 1L; + + // SimpleDateFormat doesn't serialize well; it's also not thread-safe + private static final ThreadLocal SIMPLE_DATE_FORMAT_THREAD_LOCAL = + ThreadLocal.withInitial(() -> { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + formatter.setLenient(false); + return formatter; + }); + + public VectorUDFDatetimeLegacyHybridCalendarTimestamp() { + super(); + } + + public VectorUDFDatetimeLegacyHybridCalendarTimestamp(int inputColumn, int outputColumnNum) { + super(inputColumn, outputColumnNum); + } + + protected void func(TimestampColumnVector outputColVector, TimestampColumnVector inputColVector, + int i) { + String adjustedTimestampString = SIMPLE_DATE_FORMAT_THREAD_LOCAL.get() + .format(new java.sql.Timestamp(inputColVector.time[i])); + Timestamp adjustedTimestamp = Timestamp.valueOf(adjustedTimestampString); + outputColVector.time[i] = adjustedTimestamp.toEpochMilli(); + // Nanos don't change + outputColVector.nanos[i] = inputColVector.nanos[i]; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java index 4a94b44af3..ddb019ad3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDatetimeLegacyHybridCalendar.java @@ -24,6 +24,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDatetimeLegacyHybridCalendarDate; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDatetimeLegacyHybridCalendarTimestamp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; @@ -36,16 +39,18 @@ /** - * GenericUDFToProlepticGregorian. + * GenericUDFDatetimeLegacyHybridCalendar. */ @Description(name = "datetime_legacy_hybrid_calendar", - value = "_FUNC_(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar \n" - + "assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar.", - extended = "Converts a date/timestamp to new proleptic Gregorian calendar (ISO 8601 standard), which is produced \n" - + "by extending the Gregorian calendar backward to dates preceding its official introduction in 1582, assuming \n" - + "that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar, \n" - + "i.e., calendar that supports both the Julian and Gregorian calendar systems with the support of a single \n" - + "discontinuity, which corresponds by default to the Gregorian date when the Gregorian calendar was instituted.") + value = "_FUNC_(date/timestamp) - Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar \n" + + "assuming that its internal days/milliseconds since epoch is calculated using the proleptic Gregorian calendar.", + extended = "Converts a date/timestamp to legacy Gregorian-Julian hybrid calendar, i.e., calendar that supports both\n" + + "the Julian and Gregorian calendar systems with the support of a single discontinuity, which corresponds by\n" + + "default to the Gregorian date when the Gregorian calendar was instituted; assuming that its internal\n" + + "days/milliseconds since epoch is calculated using new proleptic Gregorian calendar (ISO 8601 standard), which\n" + + "is produced by extending the Gregorian calendar backward to dates preceding its official introduction in 1582.\n") +@VectorizedExpressions({VectorUDFDatetimeLegacyHybridCalendarTimestamp.class, + VectorUDFDatetimeLegacyHybridCalendarDate.class }) public class GenericUDFDatetimeLegacyHybridCalendar extends GenericUDF { private transient PrimitiveObjectInspector inputOI; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFDatetimeLegacyHybridCalendar.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFDatetimeLegacyHybridCalendar.java new file mode 100644 index 0000000000..1ce710e74a --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFDatetimeLegacyHybridCalendar.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDatetimeLegacyHybridCalendar; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests VectorUDFDatetimeLegacyHybridCalendarTimestamp and + * VectorUDFDatetimeLegacyHybridCalendarDate. + */ +public class TestVectorUDFDatetimeLegacyHybridCalendar { + + @Test + public void testVectorUDFDatetimeLegacyHybridCalendarTimestamp() throws HiveException { + VectorizedRowBatch batch = getFreshBatchOfTimestamps(VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((TimestampColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((TimestampColumnVector) batch.cols[1]).isRepeating); + verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch); + + batch = getFreshBatchOfTimestamps(1); + batch.cols[0].isRepeating = true; // + verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch); + + batch = getFreshBatchOfTimestamps(3); + batch.cols[0].isRepeating = false; + batch.selectedInUse = true; + batch.selected = new int[] {0, 1, 2}; + verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(batch); + } + + private VectorizedRowBatch getFreshBatchOfTimestamps(int size) { + return getVectorizedRowBatch(new java.sql.Timestamp[] { + new java.sql.Timestamp(Timestamp.valueOf("0001-01-01 00:00:00").toEpochMilli()), + new java.sql.Timestamp(Timestamp.valueOf("1400-01-01 00:30:00.123456").toEpochMilli()), + new java.sql.Timestamp(Timestamp.valueOf("1500-01-01 00:30:00").toEpochMilli()), + new java.sql.Timestamp(Timestamp.valueOf("1583-01-01 00:30:00.123").toEpochMilli()), + }, + size); + } + + /** + * Input array is used to fill the entire specified size of the vector row batch + */ + private VectorizedRowBatch getVectorizedRowBatch(java.sql.Timestamp[] inputs, int size) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + TimestampColumnVector inputCol = new TimestampColumnVector(size); + for (int i = 0; i < size; i++) { + inputCol.set(i, inputs[i % inputs.length]); + } + batch.cols[0] = inputCol; + batch.cols[1] = new TimestampColumnVector(size); + batch.size = size; + return batch; + } + + private void verifyVectorUDFDatetimeLegacyHybridCalendarTimestamp(VectorizedRowBatch batch) + throws HiveException { + GenericUDF genUdf = new GenericUDFDatetimeLegacyHybridCalendar(); + genUdf.initialize(new ObjectInspector[]{ + PrimitiveObjectInspectorFactory.writableTimestampObjectInspector}); + + VectorExpression vecUdf = new VectorUDFDatetimeLegacyHybridCalendarTimestamp(0, 1); + vecUdf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + java.sql.Timestamp input = + ((TimestampColumnVector) batch.cols[in]).asScratchTimestamp(i); + java.sql.Timestamp result = + ((TimestampColumnVector) batch.cols[out]).asScratchTimestamp(i); + compareToUDFDatetimeLegacyHybridCalendar(genUdf, input, result); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + private void compareToUDFDatetimeLegacyHybridCalendar( + GenericUDF udf, java.sql.Timestamp in, java.sql.Timestamp out) throws HiveException { + TimestampWritableV2 tswInput = new TimestampWritableV2( + org.apache.hadoop.hive.common.type.Timestamp.ofEpochMilli(in.getTime(), in.getNanos())); + TimestampWritableV2 tswOutput = (TimestampWritableV2) udf + .evaluate(new GenericUDF.DeferredObject[] {new GenericUDF.DeferredJavaObject(tswInput)}); + Assert.assertEquals(tswOutput.getTimestamp(), Timestamp.ofEpochMilli(out.getTime())); + Assert.assertEquals(tswOutput.getNanos(), out.getNanos()); + } + + @Test + public void testVectorUDFDatetimeLegacyHybridCalendarDate() throws HiveException { + VectorizedRowBatch batch = getFreshBatchOfDates(VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch); + + batch = getFreshBatchOfDates(1); + batch.cols[0].isRepeating = true; // + verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch); + + batch = getFreshBatchOfDates(3); + batch.cols[0].isRepeating = false; + batch.selectedInUse = true; + batch.selected = new int[] {0, 1, 2}; + verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyVectorUDFDatetimeLegacyHybridCalendarDate(batch); + } + + private VectorizedRowBatch getFreshBatchOfDates(int size) { + return getVectorizedRowBatch(new Long[] { + -719162L, //Monday, 01 January 0001 + -208188L, //Monday, 01 January 1400 + -171664L, //Monday, 01 January 1500 + -141349L, //Monday, 01 January 1583 + }, + size); + } + + private VectorizedRowBatch getVectorizedRowBatch(Long[] inputs, int size) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + LongColumnVector inputCol = new LongColumnVector(size); + for (int i = 0; i < size; i++) { + inputCol.vector[i] = inputs[i % inputs.length]; + } + batch.cols[0] = inputCol; + batch.cols[1] = new LongColumnVector(size); + batch.size = size; + return batch; + } + + + private void verifyVectorUDFDatetimeLegacyHybridCalendarDate(VectorizedRowBatch batch) + throws HiveException { + GenericUDF genUdf = new GenericUDFDatetimeLegacyHybridCalendar(); + genUdf.initialize( + new ObjectInspector[] {PrimitiveObjectInspectorFactory.writableDateObjectInspector}); + + VectorExpression vecUdf = new VectorUDFDatetimeLegacyHybridCalendarDate(0, 1); + vecUdf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + long input = ((LongColumnVector) batch.cols[in]).vector[i]; + long output = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFDatetimeLegacyHybridCalendar(genUdf, input, output); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + private void compareToUDFDatetimeLegacyHybridCalendar(GenericUDF udf, long in, long out) + throws HiveException { + DateWritableV2 dateWInput = new DateWritableV2((int) in); + DateWritableV2 dateWOutput = (DateWritableV2) udf + .evaluate(new GenericUDF.DeferredObject[] { + new GenericUDF.DeferredJavaObject(dateWInput)}); + Assert.assertEquals(dateWOutput.get(), Date.ofEpochDay((int) out)); + } +} diff --git ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q index ce58a343cd..e5c2ee5dbc 100644 --- ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q +++ ql/src/test/queries/clientpositive/udf_datetime_legacy_hybrid_calendar.q @@ -10,3 +10,29 @@ SELECT '0501-03-07 17:03:00.4321' AS tss, CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp; + +--newer timestamps shouldn't be changed +SELECT + '1600-03-07 17:03:00.4321' AS tss, + CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp; + + +--test vectorized UDF-- +set hive.fetch.task.conversion=none; + +SELECT + '0601-03-07' AS dts, + CAST('0601-03-07' AS DATE) AS dt, + datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp; + +SELECT + '0501-03-07 17:03:00.4321' AS tss, + CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp; + +--newer timestamps shouldn't be changed +SELECT + '1600-03-07 17:03:00.4321' AS tss, + CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp; diff --git ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out index 572c6c16a4..117e707d34 100644 --- ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out +++ ql/src/test/results/clientpositive/udf_datetime_legacy_hybrid_calendar.q.out @@ -2,19 +2,20 @@ PREHOOK: query: DESCRIBE FUNCTION datetime_legacy_hybrid_calendar PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION datetime_legacy_hybrid_calendar POSTHOOK: type: DESCFUNCTION -datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar -assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar. +datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar +assuming that its internal days/milliseconds since epoch is calculated using the proleptic Gregorian calendar. PREHOOK: query: DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED datetime_legacy_hybrid_calendar POSTHOOK: type: DESCFUNCTION -datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to new proleptic Gregorian calendar -assuming that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar. -Converts a date/timestamp to new proleptic Gregorian calendar (ISO 8601 standard), which is produced -by extending the Gregorian calendar backward to dates preceding its official introduction in 1582, assuming -that its internal days/milliseconds since epoch is calculated using legacy Gregorian-Julian hybrid calendar, -i.e., calendar that supports both the Julian and Gregorian calendar systems with the support of a single -discontinuity, which corresponds by default to the Gregorian date when the Gregorian calendar was instituted. +datetime_legacy_hybrid_calendar(date/timestamp) - Converts a date/timestamp to legacy hybrid Julian-Gregorian calendar +assuming that its internal days/milliseconds since epoch is calculated using the proleptic Gregorian calendar. +Converts a date/timestamp to legacy Gregorian-Julian hybrid calendar, i.e., calendar that supports both +the Julian and Gregorian calendar systems with the support of a single discontinuity, which corresponds by +default to the Gregorian date when the Gregorian calendar was instituted; assuming that its internal +days/milliseconds since epoch is calculated using new proleptic Gregorian calendar (ISO 8601 standard), which +is produced by extending the Gregorian calendar backward to dates preceding its official introduction in 1582. + Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFDatetimeLegacyHybridCalendar Function type:BUILTIN PREHOOK: query: SELECT @@ -47,3 +48,63 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 0501-03-07 17:03:00.4321 0501-03-07 17:03:00.4321 0501-03-05 17:03:00.4321 +PREHOOK: query: SELECT + '1600-03-07 17:03:00.4321' AS tss, + CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '1600-03-07 17:03:00.4321' AS tss, + CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1600-03-07 17:03:00.4321 1600-03-07 17:03:00.4321 1600-03-07 17:03:00.4321 +PREHOOK: query: SELECT + '0601-03-07' AS dts, + CAST('0601-03-07' AS DATE) AS dt, + datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '0601-03-07' AS dts, + CAST('0601-03-07' AS DATE) AS dt, + datetime_legacy_hybrid_calendar(CAST('0601-03-07' AS DATE)) AS dtp +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +0601-03-07 0601-03-07 0601-03-04 +PREHOOK: query: SELECT + '0501-03-07 17:03:00.4321' AS tss, + CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '0501-03-07 17:03:00.4321' AS tss, + CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('0501-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +0501-03-07 17:03:00.4321 0501-03-07 17:03:00.4321 0501-03-05 17:03:00.4321 +PREHOOK: query: SELECT + '1600-03-07 17:03:00.4321' AS tss, + CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '1600-03-07 17:03:00.4321' AS tss, + CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP) AS ts, + datetime_legacy_hybrid_calendar(CAST('1600-03-07 17:03:00.4321' AS TIMESTAMP)) AS tsp +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1600-03-07 17:03:00.4321 1600-03-07 17:03:00.4321 1600-03-07 17:03:00.4321