diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java new file mode 100644 index 0000000..e03c23c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; + +import java.util.Arrays; + +public class DateColumnVector extends ColumnVector { + public int[] vector; + private final DateWritable writableObj = new DateWritable(); + public static final long NULL_VALUE = 1; + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public DateColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len + */ + public DateColumnVector(int len) { + super(len); + vector = new int[len]; + } + + @Override + public Writable getWritableObject(int index) { + if (this.isRepeating) { + index = 0; + } + if (!noNulls && isNull[index]) { + return NullWritable.get(); + } else { + writableObj.set(vector[index]); + return writableObj; + } + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; // automatic conversion to double is done here + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the column vector with the provided value + public void fill(int value) { + noNulls = true; + isRepeating = true; + vector[0] = value; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + int repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DateUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DateUtils.java new file mode 100644 index 0000000..bbe064d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DateUtils.java @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +public final class DateUtils { + private static final int Y1 = 365; + private static final int Y4 = Y1 * 4 + 1; + private static final int Y100 = Y4 * 25 - 1; + private static final int Y400 = Y100 * 4 + 1; + + // 1970 - 0001 = 4 * 400 + 3 * 100 + 17 * 4 + 1 + private static final long EPOCH_FROM_00010101 = 4 * Y400 + 3 * Y100 + 17 * Y4 + Y1; + + // 1970 = 4 * 400 + 3 * 100 + 17 * 4 + 2 * 1 + private static final long EPOCH_FROM_00000101 = 4 * Y400 + 3 * Y100 + 17 * Y4 + 2 * Y1; + + /** + * Returns a year. It returns 1 - year for a date in BC era; for example, 0 for BC 1, and -1 for BC 2. + * @param daysSinceEpoch + * @return + */ + public static int getYear(final long daysSinceEpoch) { + long d = daysSinceEpoch + EPOCH_FROM_00010101; + int offset = 0; + if (d < 0) { + offset = (int) (-d / Y400) + 1; + d += offset * Y400; + } + final int r400 = (int) (d % Y400); + final int q400 = (int) (d / Y400); + final int r100 = r400 % Y100; + final int q100 = r400 / Y100; + final int r4 = r100 % Y4; + final int q4 = r100 / Y4; + final int q1 = r4 / Y1; + + return 1 + 400 * (q400 - offset) + 100 * q100 + 4 * q4 + q1 + ((q1 == 4 || q100 == 4) ? - 1 : 0); + } + + private static final int NORMAL_JAN = 31; + private static final int NORMAL_FEB = NORMAL_JAN + 28; + private static final int NORMAL_MAR = NORMAL_FEB + 31; + private static final int NORMAL_APR = NORMAL_MAR + 30; + private static final int NORMAL_MAY = NORMAL_APR + 31; + private static final int NORMAL_JUN = NORMAL_MAY + 30; + private static final int NORMAL_JUL = NORMAL_JUN + 31; + private static final int NORMAL_AUG = NORMAL_JUL + 31; + private static final int NORMAL_SEP = NORMAL_AUG + 30; + private static final int NORMAL_OCT = NORMAL_SEP + 31; + private static final int NORMAL_NOV = NORMAL_OCT + 30; + + private static final int LEAP_JAN = 31; + private static final int LEAP_FEB = LEAP_JAN + 29; + private static final int LEAP_MAR = LEAP_FEB + 31; + private static final int LEAP_APR = LEAP_MAR + 30; + private static final int LEAP_MAY = LEAP_APR + 31; + private static final int LEAP_JUN = LEAP_MAY + 30; + private static final int LEAP_JUL = LEAP_JUN + 31; + private static final int LEAP_AUG = LEAP_JUL + 31; + private static final int LEAP_SEP = LEAP_AUG + 30; + private static final int LEAP_OCT = LEAP_SEP + 31; + private static final int LEAP_NOV = LEAP_OCT + 30; + + /** + * Returns a month. A month value starts from 1 to 12. + * @param daysSinceEpoch + * @return + */ + public static int getMonth(final long daysSinceEpoch) { + final int year = getYear(daysSinceEpoch); + final int dayOfYear = getDayOfYear(year, daysSinceEpoch); + + if (isLeapYear(year)) { + if (dayOfYear < LEAP_JAN) return 1; + if (dayOfYear < LEAP_FEB) return 2; + if (dayOfYear < LEAP_MAR) return 3; + if (dayOfYear < LEAP_APR) return 4; + if (dayOfYear < LEAP_MAY) return 5; + if (dayOfYear < LEAP_JUN) return 6; + if (dayOfYear < LEAP_JUL) return 7; + if (dayOfYear < LEAP_AUG) return 8; + if (dayOfYear < LEAP_SEP) return 9; + if (dayOfYear < LEAP_OCT) return 10; + if (dayOfYear < LEAP_NOV) return 11; + } else { + if (dayOfYear < NORMAL_JAN) return 1; + if (dayOfYear < NORMAL_FEB) return 2; + if (dayOfYear < NORMAL_MAR) return 3; + if (dayOfYear < NORMAL_APR) return 4; + if (dayOfYear < NORMAL_MAY) return 5; + if (dayOfYear < NORMAL_JUN) return 6; + if (dayOfYear < NORMAL_JUL) return 7; + if (dayOfYear < NORMAL_AUG) return 8; + if (dayOfYear < NORMAL_SEP) return 9; + if (dayOfYear < NORMAL_OCT) return 10; + if (dayOfYear < NORMAL_NOV) return 11; + } + return 12; + } + + /** + * Return a day. A day value starts from 1 to 31. + * @param daysSinceEpoch + * @return + */ + public static int getDayOfMonth(final long daysSinceEpoch) { + final int year = getYear(daysSinceEpoch); + final int dayOfYear = getDayOfYear(year, daysSinceEpoch); + + int firstDayOfMonth = 0; + + if (isLeapYear(year)) { + if (dayOfYear >= LEAP_JAN) firstDayOfMonth = LEAP_JAN; + if (dayOfYear >= LEAP_FEB) firstDayOfMonth = LEAP_FEB; + if (dayOfYear >= LEAP_MAR) firstDayOfMonth = LEAP_MAR; + if (dayOfYear >= LEAP_APR) firstDayOfMonth = LEAP_APR; + if (dayOfYear >= LEAP_MAY) firstDayOfMonth = LEAP_MAY; + if (dayOfYear >= LEAP_JUN) firstDayOfMonth = LEAP_JUN; + if (dayOfYear >= LEAP_JUL) firstDayOfMonth = LEAP_JUL; + if (dayOfYear >= LEAP_AUG) firstDayOfMonth = LEAP_AUG; + if (dayOfYear >= LEAP_SEP) firstDayOfMonth = LEAP_SEP; + if (dayOfYear >= LEAP_OCT) firstDayOfMonth = LEAP_OCT; + if (dayOfYear >= LEAP_NOV) firstDayOfMonth = LEAP_NOV; + } else { + if (dayOfYear >= NORMAL_JAN) firstDayOfMonth = NORMAL_JAN; + if (dayOfYear >= NORMAL_FEB) firstDayOfMonth = NORMAL_FEB; + if (dayOfYear >= NORMAL_MAR) firstDayOfMonth = NORMAL_MAR; + if (dayOfYear >= NORMAL_APR) firstDayOfMonth = NORMAL_APR; + if (dayOfYear >= NORMAL_MAY) firstDayOfMonth = NORMAL_MAY; + if (dayOfYear >= NORMAL_JUN) firstDayOfMonth = NORMAL_JUN; + if (dayOfYear >= NORMAL_JUL) firstDayOfMonth = NORMAL_JUL; + if (dayOfYear >= NORMAL_AUG) firstDayOfMonth = NORMAL_AUG; + if (dayOfYear >= NORMAL_SEP) firstDayOfMonth = NORMAL_SEP; + if (dayOfYear >= NORMAL_OCT) firstDayOfMonth = NORMAL_OCT; + if (dayOfYear >= NORMAL_NOV) firstDayOfMonth = NORMAL_NOV; + } + + return dayOfYear - firstDayOfMonth + 1; + } + + private static int getDayOfYear(int year, long daysSinceEpoch) { + int offset = 0; + if (year < 0) { + offset = ((-year / 400) + 1); + } + year += 400 * offset; + daysSinceEpoch += Y400 * offset; + final int lastYear = year - 1; + final int firstDayOfYear = Y1 * year + lastYear / 4 - lastYear / 100 + lastYear / 400; + return (int) (daysSinceEpoch + EPOCH_FROM_00000101 - firstDayOfYear); + } + + private static boolean isLeapYear(final int year) { + if (year % 400 == 0) return true; + if (year % 100 == 0) return false; + if (year % 4 == 0) return true; + return false; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java index d1a75df..ace51bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -104,6 +105,13 @@ protected void assignLong(long value, int destIndex) { } } + private static abstract class VectorDateColumnAssign + extends VectorColumnAssignVectorBase { + protected void assignDate(int value, int destIndex) { + outCol.vector[destIndex] = value; + } + } + private static abstract class VectorDoubleColumnAssign extends VectorColumnAssignVectorBase { @@ -150,6 +158,14 @@ public void reset() { if (cv == null) { continue; } + else if (cv instanceof DateColumnVector) { + vca[i] = new VectorDateColumnAssign() { + @Override + protected void copyValue(DateColumnVector in, int srcIndex, int destIndex) { + assignDate(in.vector[srcIndex], destIndex); + } + }.init(outputBatch, (DateColumnVector) cv); + } else if (cv instanceof LongColumnVector) { vca[i] = new VectorLongColumnAssign() { @Override @@ -336,6 +352,20 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { poi.getPrimitiveCategory()); } } + else if (destCol instanceof DateColumnVector) { + outVCA = new VectorDateColumnAssign() { + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + if (val == null) { + assignNull(destIndex); + } + else { + DateWritable bw = (DateWritable) val; + assignDate(bw.getDays(), destIndex); + } + } + }.init(outputBatch, (DateColumnVector) destCol); + } else { throw new HiveException("Unknown vector column type " + destCol.getClass().getName()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index 1c70387..dfa2ba2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -35,6 +35,7 @@ DOUBLE(2), STRING(3), DECIMAL(4), + DATE(5), ANY(7); private final int value; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java index f083d86..b21b2ad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java @@ -36,6 +36,7 @@ public class VectorHashKeyWrapper extends KeyWrapper { private long[] longValues; + private int[] dateValues; private double[] doubleValues; private byte[][] byteValues; @@ -45,13 +46,14 @@ private boolean[] isNull; private int hashcode; - public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, int byteValuesCount) { + public VectorHashKeyWrapper(int longValuesCount, int dateValuesCount, int doubleValuesCount, int byteValuesCount) { longValues = new long[longValuesCount]; + dateValues = new int[dateValuesCount]; doubleValues = new double[doubleValuesCount]; byteValues = new byte[byteValuesCount][]; byteStarts = new int[byteValuesCount]; byteLengths = new int[byteValuesCount]; - isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount]; + isNull = new boolean[longValuesCount + dateValuesCount + doubleValuesCount + byteValuesCount]; } private VectorHashKeyWrapper() { @@ -65,6 +67,7 @@ public void getNewKey(Object row, ObjectInspector rowInspector) throws HiveExcep @Override public void setHashKey() { hashcode = Arrays.hashCode(longValues) ^ + Arrays.hashCode(dateValues) ^ Arrays.hashCode(doubleValues) ^ Arrays.hashCode(isNull); @@ -74,7 +77,7 @@ public void setHashKey() { * Hashing the string is potentially expensive so is better to branch. * Additionally not looking at values for nulls allows us not reset the values. */ - if (!isNull[longValues.length + doubleValues.length + i]) { + if (!isNull[longValues.length + dateValues.length + doubleValues.length + i]) { byte[] bytes = byteValues[i]; int start = byteStarts[i]; int length = byteLengths[i]; @@ -103,6 +106,7 @@ public boolean equals(Object that) { VectorHashKeyWrapper keyThat = (VectorHashKeyWrapper)that; return hashcode == keyThat.hashcode && Arrays.equals(longValues, keyThat.longValues) && + Arrays.equals(dateValues, keyThat.dateValues) && Arrays.equals(doubleValues, keyThat.doubleValues) && Arrays.equals(isNull, keyThat.isNull) && byteValues.length == keyThat.byteValues.length && @@ -134,6 +138,7 @@ private boolean bytesEquals(VectorHashKeyWrapper keyThat) { protected Object clone() { VectorHashKeyWrapper clone = new VectorHashKeyWrapper(); clone.longValues = longValues.clone(); + clone.dateValues = dateValues.clone(); clone.doubleValues = doubleValues.clone(); clone.isNull = isNull.clone(); @@ -142,7 +147,7 @@ protected Object clone() { clone.byteLengths = byteLengths.clone(); for (int i = 0; i < byteValues.length; ++i) { // avoid allocation/copy of nulls, because it potentially expensive. branch instead. - if (!isNull[longValues.length + doubleValues.length + i]) { + if (!isNull[longValues.length + dateValues.length + doubleValues.length + i]) { clone.byteValues[i] = Arrays.copyOfRange( byteValues[i], byteStarts[i], @@ -201,11 +206,22 @@ public void assignNullString(int index) { isNull[longValues.length + doubleValues.length + index] = true; } + public void assignDate(int index, int v) { + dateValues[index] = v; + isNull[longValues.length + doubleValues.length + byteValues.length + index] = false; + } + + public void assignNullDate(int index) { + dateValues[index] = 0; // assign 0 to simplify hashcode + isNull[longValues.length + doubleValues.length + byteValues.length + index] = true; + } + @Override public String toString() { - return String.format("%d[%s] %d[%s] %d[%s]", + return String.format("%d[%s] %d[%s] %d[%s] %d[%s]", longValues.length, Arrays.toString(longValues), + dateValues.length, Arrays.toString(dateValues), doubleValues.length, Arrays.toString(doubleValues), byteValues.length, Arrays.toString(byteValues)); } @@ -222,7 +238,10 @@ public boolean getIsBytesNull(int i) { return isNull[longValues.length + doubleValues.length + i]; } - + public boolean getIsDateNull(int i) { + return isNull[longValues.length + doubleValues.length + byteValues.length + i]; + } + public long getLongValue(int i) { return longValues[i]; } @@ -243,6 +262,10 @@ public int getByteLength(int i) { return byteLengths[i]; } + public int getDateValue(int i) { + return dateValues[i]; + } + public int getVariableSize() { int variableSize = 0; for (int i=0; i= 0) { return kw.getIsLongNull(klh.longIndex) ? null : keyOutputWriter.writeValue(kw.getLongValue(klh.longIndex)); + } else if (klh.dateIndex >= 0) { + return kw.getIsDateNull(klh.dateIndex) ? null : + keyOutputWriter.writeValue(kw.getDateValue(klh.dateIndex)); } else if (klh.doubleIndex >= 0) { return kw.getIsDoubleNull(klh.doubleIndex) ? null : keyOutputWriter.writeValue(kw.getDoubleValue(klh.doubleIndex)); @@ -531,8 +649,8 @@ public Object getWritableKeyValue(VectorHashKeyWrapper kw, int i, kw.getByteLength(klh.stringIndex)); } else { throw new HiveException(String.format( - "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d", - i, klh.longIndex, klh.doubleIndex, klh.stringIndex)); + "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d", + i, klh.longIndex, klh.dateIndex, klh.doubleIndex, klh.stringIndex)); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index f5ab731..fbdf8b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -885,6 +885,10 @@ public static boolean isDatetimeFamily(String resultType) { return resultType.equalsIgnoreCase("timestamp"); } + public static boolean isDateFamily(String resultType) { + return resultType.equalsIgnoreCase("date"); + } + // return true if this is any kind of float public static boolean isFloatFamily(String resultType) { return resultType.equalsIgnoreCase("double") @@ -913,6 +917,9 @@ public static String mapJavaTypeToVectorType(String javaType) isDatetimeFamily(javaType)) { return "bigint"; } + if (isDateFamily(javaType)) { + return "date"; + } throw new HiveException("Unsuported type for vectorization: " + javaType); } @@ -1029,6 +1036,8 @@ static String getNormalizedTypeName(String colType) { normalizedType = "Double"; } else if (colType.equalsIgnoreCase("String")) { normalizedType = "String"; + } else if (colType.equalsIgnoreCase("Date")) { + normalizedType = "Date"; } else { normalizedType = "Long"; } @@ -1114,6 +1123,7 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) {"Double", DoubleColumnVector.class}, {"Long", LongColumnVector.class}, {"String", BytesColumnVector.class}, + {"Date", DateColumnVector.class}, }; public Map getOutputColumnTypeMap() { @@ -1134,6 +1144,8 @@ public static ColumnVector allocateColumnVector(String type, int defaultSize) { return new DoubleColumnVector(defaultSize); } else if (isStringFamily(type)) { return new BytesColumnVector(defaultSize); + } else if (isDateFamily(type)) { + return new DateColumnVector(defaultSize); } else { return new LongColumnVector(defaultSize); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 80bf671..679aec0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -147,6 +148,17 @@ public static void AddRowToBatch(Object row, StructObjectInspector oi, int rowIn } } break; + case DATE: { + DateColumnVector dcv = (DateColumnVector) batch.cols[i]; + if (writableCol != null) { + dcv.vector[rowIndex] = ((DateWritable) writableCol).getDays(); + dcv.isNull[rowIndex] = false; + } else { + dcv.vector[rowIndex] = 1; + SetNullColIsNullValue(dcv, rowIndex); + } + } + break; case FLOAT: { DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[i]; if (writableCol != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java index 69553d9..6be8111 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java @@ -29,7 +29,9 @@ import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyDate; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; @@ -166,6 +168,12 @@ public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspe tw.set(t); LazyTimestamp.writeUTF8(serializeVectorStream, tw); break; + case DATE: + DateColumnVector dacv = (DateColumnVector) batch.cols[k]; + DateWritable daw = new DateWritable(); + daw.set(dacv.vector[rowIndex]); + LazyDate.writeUTF8(serializeVectorStream, daw); + break; default: throw new UnsupportedOperationException( "Vectorizaton is not supported for datatype:" diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index f513188..e81676f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -233,7 +233,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; // Vectorization currently only supports the following data types: - // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING and TIMESTAMP + // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP and DATE switch (poi.getPrimitiveCategory()) { case BOOLEAN: case BYTE: @@ -250,6 +250,9 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case STRING: result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; + case DATE: + result.cols[j] = new DateColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + break; default: throw new RuntimeException("Vectorizaton is not supported for datatype:" + poi.getPrimitiveCategory()); @@ -367,6 +370,8 @@ private ColumnVector allocateColumnVector(String type, int defaultSize) { return new DoubleColumnVector(defaultSize); } else if (type.equalsIgnoreCase("string")) { return new BytesColumnVector(defaultSize); + } else if (type.equalsIgnoreCase("date")) { + return new DateColumnVector(defaultSize); } else { return new LongColumnVector(defaultSize); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index a242fef..be97abe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -21,6 +21,7 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; +import java.sql.Date; import java.util.List; import org.apache.commons.lang.ArrayUtils; @@ -28,12 +29,14 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -247,6 +250,56 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx } /** + * Specialized writer for DoubleColumnVector. Will throw cast exception + * if the wrong vector column is used. + */ + private static abstract class VectorExpressionWriterDate extends VectorExpressionWriterBase { + @Override + public Object writeValue(ColumnVector column, int row) throws HiveException { + DateColumnVector dcv = (DateColumnVector) column; + if (dcv.noNulls && !dcv.isRepeating) { + return writeValue(dcv.vector[row]); + } else if (dcv.noNulls && dcv.isRepeating) { + return writeValue(dcv.vector[0]); + } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { + return writeValue(dcv.vector[row]); + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; + } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { + return writeValue(dcv.vector[0]); + } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { + return null; + } + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); + } + + @Override + public Object setValue(Object field, ColumnVector column, int row) throws HiveException { + DateColumnVector dcv = (DateColumnVector) column; + if (dcv.noNulls && !dcv.isRepeating) { + return setValue(field, dcv.vector[row]); + } else if (dcv.noNulls && dcv.isRepeating) { + return setValue(field, dcv.vector[0]); + } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { + return setValue(field, dcv.vector[row]); + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; + } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { + return setValue(field, dcv.vector[0]); + } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { + return null; + } + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); + } + } + + /** * Specialized writer for BytesColumnVector. Will throw cast exception * if the wrong vector column is used. */ @@ -389,9 +442,41 @@ private static VectorExpressionWriter genVectorExpressionWritableDecimal( private static VectorExpressionWriter genVectorExpressionWritableDate( SettableDateObjectInspector fieldObjInspector) throws HiveException { - // We should never reach this, the compile validation should guard us - throw new HiveException("DATE primitive type not supported in vectorization."); - } + return new VectorExpressionWriterDate() { + private Object obj; + private Date d; + + public VectorExpressionWriter init(SettableDateObjectInspector objInspector) + throws HiveException { + super.init(objInspector); + d = new Date(0); + obj = initValue(null); + return this; + } + + @Override + public Object writeValue(long value) { + d.setTime(DateWritable.daysToMillis((int) value)); + ((SettableDateObjectInspector) this.objectInspector).set(obj, d); + return obj; + } + + @Override + public Object setValue(Object field, long value) { + d.setTime(DateWritable.daysToMillis((int) value)); + if (null == field) { + field = initValue(null); + } + ((SettableDateObjectInspector) this.objectInspector).set(field, d); + return field; + } + + @Override + public Object initValue(Object ignored) { + return ((SettableDateObjectInspector) this.objectInspector).create(new Date(0)); + } + }.init(fieldObjInspector); + } private static VectorExpressionWriter genVectorExpressionWritableTimestamp( SettableTimestampObjectInspector fieldObjInspector) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthDate.java new file mode 100644 index 0000000..59a51c9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthDate.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DateUtils; + +import java.util.Calendar; + +/** + * Expression to get day of month. + * Extends {@link VectorUDFTimestampFieldDate} + */ + +public class VectorUDFDayOfMonthDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFDayOfMonthDate(int colNum, int outputColumn) { + super(Calendar.DAY_OF_MONTH, colNum, outputColumn); + } + + public VectorUDFDayOfMonthDate() { + super(); + } + + @Override + protected long getField(long days) { + return DateUtils.getDayOfMonth(days); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java new file mode 100644 index 0000000..b754fbd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DateUtils; + +import java.util.Calendar; + +/** + * Expression to get day of month. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public class VectorUDFMonthDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFMonthDate(int colNum, int outputColumn) { + super(Calendar.MONTH, colNum, outputColumn); + } + + public VectorUDFMonthDate() { + super(); + } + + @Override + protected long getField(long days) { + return DateUtils.getMonth(days); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java new file mode 100644 index 0000000..9ae5373 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +import java.sql.Timestamp; +import java.util.Calendar; + +/** + * Abstract class to return various fields from a Date. + */ +public abstract class VectorUDFTimestampFieldDate extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected int outputColumn; + protected int field; + protected transient final Calendar calendar = Calendar.getInstance(); + protected transient final Timestamp ts = new Timestamp(0); + + public VectorUDFTimestampFieldDate(int field, int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + this.field = field; + } + + public VectorUDFTimestampFieldDate() { + super(); + } + + protected long getField(long days) { + calendar.setTimeInMillis(DateWritable.daysToMillis((int) days)); + return calendar.get(field); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + DateColumnVector inputCol = (DateColumnVector)batch.cols[this.colNum]; + /* every line below this is identical for evaluateLong & evaluateString */ + final int n = inputCol.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + + if(batch.size == 0) { + /* n != batch.size when isRepeating */ + return; + } + + /* true for all algebraic UDFs with no state */ + outV.isRepeating = inputCol.isRepeating; + + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getField(inputCol.vector[i]); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = getField(inputCol.vector[i]); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getField(inputCol.vector[i]); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getField(inputCol.vector[i]); + } + } + } + } + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public int getField() { + return field; + } + + public void setField(int field) { + this.field = field; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DATE) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java new file mode 100644 index 0000000..1ce982b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Return Unix Timestamp. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public class VectorUDFUnixTimeStampDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFUnixTimeStampDate(int colNum, int outputColumn) { + super(-1, colNum, outputColumn); + } + + public VectorUDFUnixTimeStampDate() { + super(); + } + + @Override + protected long getField(long days) { + return DateWritable.daysToMillis((int) days) / 1000; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearDate.java new file mode 100644 index 0000000..87d691d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearDate.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +public class VectorUDFWeekOfYearDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFWeekOfYearDate(int colNum, int outputColumn) { + super(Calendar.WEEK_OF_YEAR, colNum, outputColumn); + initCalendar(); + } + + public VectorUDFWeekOfYearDate() { + super(); + initCalendar(); + } + + private void initCalendar() { + /* code copied over from UDFWeekOfYear implementation */ + calendar.setFirstDayOfWeek(Calendar.MONDAY); + calendar.setMinimalDaysInFirstWeek(4); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearDate.java new file mode 100644 index 0000000..03af41a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearDate.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DateUtils; + +import java.util.Calendar; + +/** + * Expression to get year as a long. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public class VectorUDFYearDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFYearDate(int colNum, int outputColumn) { + super(Calendar.YEAR, colNum, outputColumn); + } + + public VectorUDFYearDate() { + super(); + } + + @Override + protected long getField(long days) { + return DateUtils.getYear(days); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFZeroFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFZeroFieldDate.java new file mode 100644 index 0000000..c4660c7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFZeroFieldDate.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Returns a zero value. For example, hour, minute, and second fields of a date value return always 0. + */ +public class VectorUDFZeroFieldDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFZeroFieldDate() { + super(); + } + + public VectorUDFZeroFieldDate(int colNum, int outputColumn) { + super(-1, colNum, outputColumn); + } + + @Override + protected long getField(long days) { + return 0; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 3bc9493..3ec512b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.exec.vector.udf; +import java.sql.Date; import java.sql.Timestamp; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -27,9 +28,11 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; @@ -234,6 +237,13 @@ private void setOutputCol(ColumnVector colVec, int i, Object value) { } else { lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value); } + } else if (outputOI instanceof WritableDateObjectInspector) { + DateColumnVector lv = (DateColumnVector) colVec; + if (value instanceof Date) { + lv.vector[i] = DateWritable.dateToDays((Date) value); + } else { + lv.vector[i] = ((WritableDateObjectInspector) outputOI).getPrimitiveWritableObject(value).getDays(); + } } else if (outputOI instanceof WritableDoubleObjectInspector) { DoubleColumnVector dv = (DoubleColumnVector) colVec; if (value instanceof Double) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index c3c9685..5b3ddff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -969,6 +969,7 @@ void skipRows(long items) throws IOException { private static class DateTreeReader extends TreeReader{ private IntegerReader reader = null; + private LongColumnVector backingVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); DateTreeReader(Path path, int columnId) { super(path, columnId); @@ -1016,18 +1017,25 @@ Object next(Object previous) throws IOException { @Override Object nextVector(Object previousVector, long batchSize) throws IOException { - LongColumnVector result = null; + DateColumnVector result = null; if (previousVector == null) { - result = new LongColumnVector(); + result = new DateColumnVector(); } else { - result = (LongColumnVector) previousVector; + result = (DateColumnVector) previousVector; } // Read present/isNull stream - super.nextVector(result, batchSize); + super.nextVector(backingVector, batchSize); // Read value entries based on isNull entries - reader.nextVector(result, batchSize); + reader.nextVector(backingVector, batchSize); + + result.isRepeating = backingVector.isRepeating; + result.noNulls = backingVector.noNulls; + for (int i = 0; i < backingVector.vector.length; i++) { + result.isNull[i] = backingVector.isNull[i]; + result.vector[i] = (int) backingVector.vector[i]; + } return result; } @@ -2428,6 +2436,7 @@ static int getIndexPosition(OrcProto.ColumnEncoding.Kind encoding, case LONG: case FLOAT: case DOUBLE: + case DATE: case STRUCT: case MAP: case LIST: diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index ad96fa5..03d7dda 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -188,6 +188,7 @@ public Vectorizer() { supportedDataTypes.add("byte"); supportedDataTypes.add("float"); supportedDataTypes.add("double"); + supportedDataTypes.add("date"); supportedGenericUDFs.add(GenericUDFOPPlus.class); supportedGenericUDFs.add(GenericUDFOPMinus.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java index 20add85..85d6e92 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java @@ -22,10 +22,12 @@ import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; +import java.util.GregorianCalendar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthString; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -43,14 +45,21 @@ + "'yyyy-MM-dd'.\n" + "Example:\n " + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 30") -@VectorizedExpressions({VectorUDFDayOfMonthLong.class, VectorUDFDayOfMonthString.class}) +@VectorizedExpressions({VectorUDFDayOfMonthDate.class, VectorUDFDayOfMonthLong.class, VectorUDFDayOfMonthString.class}) public class UDFDayOfMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private final Calendar calendar = Calendar.getInstance(); + + // ISO 8601 uses Gregorian calendar only. + // See also http://en.wikipedia.org/wiki/ISO_8601 + private final GregorianCalendar calendar = new GregorianCalendar(); private final IntWritable result = new IntWritable(); public UDFDayOfMonth() { + // A default GregorianCalendar object uses Julian calendar before 1582-10-15. This setting forces to use Gregorian + // calendar for entire time range. + // See also http://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html#setGregorianChange(java.util.Date) + calendar.setGregorianChange(new Date(Long.MIN_VALUE)); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java index 4e34dbf..cbb3538 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java @@ -22,10 +22,12 @@ import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; +import java.util.GregorianCalendar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthString; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -41,14 +43,21 @@ value = "_FUNC_(date) - Returns the month of date", extended = "Example:\n" + " > SELECT _FUNC_('2009-30-07') FROM src LIMIT 1;\n" + " 7") -@VectorizedExpressions({VectorUDFMonthLong.class, VectorUDFMonthString.class}) +@VectorizedExpressions({VectorUDFMonthDate.class, VectorUDFMonthLong.class, VectorUDFMonthString.class}) public class UDFMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private final Calendar calendar = Calendar.getInstance(); + + // ISO 8601 uses Gregorian calendar only. + // See also http://en.wikipedia.org/wiki/ISO_8601 + private final GregorianCalendar calendar = new GregorianCalendar(); private IntWritable result = new IntWritable(); public UDFMonth() { + // A default GregorianCalendar object uses Julian calendar before 1582-10-15. This setting forces to use Gregorian + // calendar for entire time range. + // See also http://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html#setGregorianChange(java.util.Date) + calendar.setGregorianChange(new Date(Long.MIN_VALUE)); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java index f076d1d..568db27 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearString; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -44,7 +45,7 @@ + " > SELECT _FUNC_('2008-02-20') FROM src LIMIT 1;\n" + " 8\n" + " > SELECT _FUNC_('1980-12-31 12:59:59') FROM src LIMIT 1;\n" + " 1") -@VectorizedExpressions({VectorUDFWeekOfYearLong.class, VectorUDFWeekOfYearString.class}) +@VectorizedExpressions({VectorUDFWeekOfYearDate.class, VectorUDFWeekOfYearLong.class, VectorUDFWeekOfYearString.class}) public class UDFWeekOfYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java index 1853860..97c3d68 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java @@ -22,10 +22,12 @@ import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; +import java.util.GregorianCalendar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearString; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -43,18 +45,27 @@ + "'yyyy-MM-dd'.\n" + "Example:\n " + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 2009") -@VectorizedExpressions({VectorUDFYearLong.class, VectorUDFYearString.class}) +@VectorizedExpressions({VectorUDFYearDate.class, VectorUDFYearLong.class, VectorUDFYearString.class}) public class UDFYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private final Calendar calendar = Calendar.getInstance(); + + // ISO 8601 uses Gregorian calendar only. + // See also http://en.wikipedia.org/wiki/ISO_8601 + private final GregorianCalendar calendar = new GregorianCalendar(); private final IntWritable result = new IntWritable(); public UDFYear() { + // A default GregorianCalendar object uses Julian calendar before 1582-10-15. This setting forces to use Gregorian + // calendar for entire time range. + // See also http://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html#setGregorianChange(java.util.Date) + calendar.setGregorianChange(new Date(Long.MIN_VALUE)); } /** - * Get the year from a date string. + * Get the year from a date string. ISO 8601 uses astronomical + * year numbering. In ISO 8601, 0001 is equal to 1 AD, 0002 is equal to 2 AD, ... And 0000 is equal to 1 BC, + * -0001 is equal to 2 BC, ... * * @param dateString * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or @@ -71,7 +82,11 @@ public IntWritable evaluate(Text dateString) { try { Date date = formatter.parse(dateString.toString()); calendar.setTime(date); - result.set(calendar.get(Calendar.YEAR)); + if (calendar.get(Calendar.ERA) == GregorianCalendar.AD) { + result.set(calendar.get(Calendar.YEAR)); + } else { + result.set(-calendar.get(Calendar.YEAR) + 1); + } return result; } catch (ParseException e) { return null; @@ -84,7 +99,11 @@ public IntWritable evaluate(DateWritable d) { } calendar.setTime(d.get()); - result.set(calendar.get(Calendar.YEAR)); + if (calendar.get(Calendar.ERA) == GregorianCalendar.AD) { + result.set(calendar.get(Calendar.YEAR)); + } else { + result.set(-calendar.get(Calendar.YEAR) + 1); + } return result; } @@ -94,7 +113,11 @@ public IntWritable evaluate(TimestampWritable t) { } calendar.setTime(t.getTimestamp()); - result.set(calendar.get(Calendar.YEAR)); + if (calendar.get(Calendar.ERA) == GregorianCalendar.AD) { + result.set(calendar.get(Calendar.YEAR)); + } else { + result.set(-calendar.get(Calendar.YEAR) + 1); + } return result; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index dc259c6..29b7bdb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampString; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -43,7 +44,8 @@ @Description(name = "to_unix_timestamp", value = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp", extended = "Converts the specified time to number of seconds since 1970-01-01.") -@VectorizedExpressions({VectorUDFUnixTimeStampLong.class, VectorUDFUnixTimeStampString.class}) +@VectorizedExpressions({VectorUDFUnixTimeStampDate.class, VectorUDFUnixTimeStampLong.class, + VectorUDFUnixTimeStampString.class}) public class GenericUDFToUnixTimeStamp extends GenericUDF { private transient StringObjectInspector intputTextOI; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 454a02d..0d7d5da 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUpper; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList; @@ -169,6 +170,13 @@ public void testVectorExpressionDescriptor() { .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); IsNull v5 = new IsNull(); Assert.assertEquals(d5, v5.getDescriptor()); + + VectorExpressionDescriptor.Builder builder6 = new VectorExpressionDescriptor.Builder(); + VectorExpressionDescriptor.Descriptor d6 = builder6.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1).setArgumentTypes(VectorExpressionDescriptor.ArgumentType.DATE) + .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + VectorUDFWeekOfYearDate v6 = new VectorUDFWeekOfYearDate(); + Assert.assertEquals(d6, v6.getDescriptor()); } @Test diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java new file mode 100644 index 0000000..8a2cdd5 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java @@ -0,0 +1,472 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import junit.framework.Assert; +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateUtils; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; +import org.apache.hadoop.hive.ql.udf.UDFMonth; +import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; +import org.apache.hadoop.hive.ql.udf.UDFYear; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Test; + +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; +import java.util.Random; + +public class TestVectorDateExpressions { + /* copied over from VectorUDFTimestampFieldLong */ + private TimestampWritable toTimestampWritable(long daysSinceEpoch) { + Timestamp ts = new Timestamp(DateWritable.daysToMillis((int) daysSinceEpoch)); + return new TimestampWritable(ts); + } + + private int[] getAllBoundaries() { + List boundaries = new ArrayList(1); + Calendar c = Calendar.getInstance(); + c.setTimeInMillis(0); // c.set doesn't reset millis + for (int year = 1902; year <= 2038; year++) { + c.set(year, Calendar.JANUARY, 1, 0, 0, 0); + int exactly = (int) (c.getTimeInMillis() / (24 * 60 * 60 * 1000)); + int before = exactly - 1; + int after = exactly + 1; + boundaries.add(Integer.valueOf(before)); + boundaries.add(Integer.valueOf(exactly)); + boundaries.add(Integer.valueOf(after)); + } + Integer[] indices = boundaries.toArray(new Integer[1]); + return ArrayUtils.toPrimitive(indices); + } + + private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + DateColumnVector lcv = new DateColumnVector(size); + Random rand = new Random(seed); + for (int i = 0; i < size; i++) { + lcv.vector[i] = (rand.nextInt()); + } + batch.cols[0] = lcv; + batch.cols[1] = new LongColumnVector(size); + batch.size = size; + return batch; + } + + /* + * Input array is used to fill the entire size of the vector row batch + */ + private VectorizedRowBatch getVectorizedRowBatch(int[] inputs, int size) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + DateColumnVector lcv = new DateColumnVector(size); + for (int i = 0; i < size; i++) { + lcv.vector[i] = inputs[i % inputs.length]; + } + batch.cols[0] = lcv; + batch.cols[1] = new LongColumnVector(size); + batch.size = size; + return batch; + } + + private void compareToUDFYearDate(long t, int y) { + UDFYear udf = new UDFYear(); + TimestampWritable tsw = toTimestampWritable(t); + IntWritable res = udf.evaluate(tsw); + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFYear(VectorizedRowBatch batch) { + VectorExpression udf = null; + udf = new VectorUDFYearDate(0, 1); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + if (!batch.cols[in].noNulls) { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFYearDate(t, (int) y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFYear() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFYear(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFYear(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFYear(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFYear(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFYear(batch); + } + + private void compareToUDFDayOfMonthDate(long t, int y) { + UDFDayOfMonth udf = new UDFDayOfMonth(); + TimestampWritable tsw = toTimestampWritable(t); + IntWritable res = udf.evaluate(tsw); + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFDayOfMonth(VectorizedRowBatch batch) { + VectorExpression udf = null; + udf = new VectorUDFDayOfMonthDate(0, 1); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + if (!batch.cols[in].noNulls) { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFDayOfMonthDate(t, (int) y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFDayOfMonth() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFDayOfMonth(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFDayOfMonth(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFDayOfMonth(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFDayOfMonth(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFDayOfMonth(batch); + } + + private void compareToUDFMonthDate(long t, int y) { + UDFMonth udf = new UDFMonth(); + TimestampWritable tsw = toTimestampWritable(t); + IntWritable res = udf.evaluate(tsw); + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFMonth(VectorizedRowBatch batch) { + VectorExpression udf; + udf = new VectorUDFMonthDate(0, 1); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + if (!batch.cols[in].noNulls) { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFMonthDate(t, (int) y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFMonth() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFMonth(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFMonth(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFMonth(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFMonth(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFMonth(batch); + } + + private LongWritable getLongWritable(TimestampWritable i) { + LongWritable result = new LongWritable(); + if (i == null) { + return null; + } else { + result.set(i.getSeconds()); + return result; + } + } + + private void compareToUDFUnixTimeStampDate(long t, long y) { + TimestampWritable tsw = toTimestampWritable(t); + LongWritable res = getLongWritable(tsw); + if(res.get() != y) { + System.out.printf("%d vs %d for %d, %d\n", res.get(), y, t, + tsw.getTimestamp().getTime()/1000); + } + + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch) { + VectorExpression udf; + udf = new VectorUDFUnixTimeStampDate(0, 1); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + if (!batch.cols[out].noNulls) { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFUnixTimeStampDate(t, y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFUnixTimeStamp() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFUnixTimeStamp(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFUnixTimeStamp(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFUnixTimeStamp(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFUnixTimeStamp(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFUnixTimeStamp(batch); + } + + private void compareToUDFWeekOfYearDate(long t, int y) { + UDFWeekOfYear udf = new UDFWeekOfYear(); + TimestampWritable tsw = toTimestampWritable(t); + IntWritable res = udf.evaluate(tsw); + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFWeekOfYear(VectorizedRowBatch batch) { + VectorExpression udf; + udf = new VectorUDFWeekOfYearDate(0, 1); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFWeekOfYearDate(t, (int) y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFWeekOfYear() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFWeekOfYear(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFWeekOfYear(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFWeekOfYear(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFWeekOfYear(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFWeekOfYear(batch); + } + + private void verifyUDFHour(VectorizedRowBatch batch) { + VectorExpression udf; + udf = new VectorUDFZeroFieldDate(0, 1); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + Assert.assertEquals(0, y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFHour() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFHour(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFHour(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFHour(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFHour(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFHour(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFHour(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFHour(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFHour(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFHour(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFHour(batch); + } + + public static void main(String[] args) { + TestVectorDateExpressions self = new TestVectorDateExpressions(); + self.testVectorUDFYear(); + self.testVectorUDFMonth(); + self.testVectorUDFDayOfMonth(); + self.testVectorUDFHour(); + self.testVectorUDFWeekOfYear(); + self.testVectorUDFUnixTimeStamp(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java index a7567b7..d1b3d89 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/TestVectorUDFAdaptor.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -31,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.exec.vector.udf.generic.GenericUDFIsNull; import org.apache.hadoop.hive.ql.exec.vector.udf.legacy.ConcatTextLongDoubleUDF; +import org.apache.hadoop.hive.ql.exec.vector.udf.legacy.DateUDF; import org.apache.hadoop.hive.ql.exec.vector.udf.legacy.LongUDF; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -126,6 +128,69 @@ public void testLongUDF() { } @Test + public void testDateUdf() { + + // create a syntax tree for a simple function call "dateudf(col0)" + ExprNodeGenericFuncDesc funcDesc; + TypeInfo typeInfo = TypeInfoFactory.dateTypeInfo; + GenericUDFBridge genericUDFBridge = new GenericUDFBridge("dateudf", false, + DateUDF.class.getName()); + List children = new ArrayList(); + ExprNodeColumnDesc colDesc + = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false); + children.add(colDesc); + VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1]; + argDescs[0] = new VectorUDFArgDesc(); + argDescs[0].setVariable(0); + funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge, + genericUDFBridge.getUdfName(), children); + + // create the adaptor for this function call to work in vector mode + VectorUDFAdaptor vudf = null; + try { + vudf = new VectorUDFAdaptor(funcDesc, 1, "Date", argDescs); + } catch (HiveException e) { + + // We should never get here. + assertTrue(false); + } + + VectorizedRowBatch b = getBatchDateInDateOut(); + vudf.evaluate(b); + + // verify output + DateColumnVector out = (DateColumnVector) b.cols[1]; + assertEquals(1000, out.vector[0]); + assertEquals(1001, out.vector[1]); + assertEquals(1002, out.vector[2]); + assertTrue(out.noNulls); + assertFalse(out.isRepeating); + + // with nulls + b = getBatchDateInDateOut(); + out = (DateColumnVector) b.cols[1]; + b.cols[0].noNulls = false; + vudf.evaluate(b); + assertFalse(out.noNulls); + assertEquals(1000, out.vector[0]); + assertEquals(1001, out.vector[1]); + assertTrue(out.isNull[2]); + assertFalse(out.isRepeating); + + // with repeating + b = getBatchDateInDateOut(); + out = (DateColumnVector) b.cols[1]; + b.cols[0].isRepeating = true; + vudf.evaluate(b); + + // The implementation may or may not set output it isRepeting. + // That is implementation-defined. + assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000 + || !b.cols[1].isRepeating && out.vector[2] == 1000); + assertEquals(3, b.size); + } + + @Test public void testMultiArgumentUDF() { // create a syntax tree for a function call "testudf(col0, col1, col2)" @@ -214,6 +279,21 @@ private VectorizedRowBatch getBatchLongInLongOut() { return b; } + private VectorizedRowBatch getBatchDateInDateOut() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + DateColumnVector in = new DateColumnVector(); + DateColumnVector out = new DateColumnVector(); + b.cols[0] = in; + b.cols[1] = out; + in.vector[0] = 0; + in.vector[1] = 1; + in.vector[2] = 2; + in.isNull[2] = true; + in.noNulls = true; + b.size = 3; + return b; + } + private VectorizedRowBatch getBatchStrDblLongWithStrOut() { VectorizedRowBatch b = new VectorizedRowBatch(4); BytesColumnVector strCol = new BytesColumnVector(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/DateUDF.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/DateUDF.java new file mode 100644 index 0000000..7446ff8 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/udf/legacy/DateUDF.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.udf.legacy; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +@Description( + name = "dateudf", + value = "_FUNC_(str) - returns date + 1000", + extended = "Example:\n" + + " > SELECT dateudf(dob) FROM employee;\n" + + " Jack" +) +public class DateUDF extends UDF { + public DateWritable evaluate(DateWritable d) { + if (d == null) { + return null; + } + return new DateWritable(d.getDays() + 1000); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index 23d89df..aca1711 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -166,7 +166,7 @@ private void checkVectorizedReader() throws Exception { // Dates are stored as long, so convert and compare if (a instanceof Date) { Date adt = (Date) a; - Assert.assertEquals(adt.getTime(), DateWritable.daysToMillis((int) ((LongWritable) b).get())); + Assert.assertEquals(adt.getTime(), DateWritable.daysToMillis(((DateWritable) b).getDays())); continue; } diff --git ql/src/test/queries/clientpositive/vectorized_date_funcs.q ql/src/test/queries/clientpositive/vectorized_date_funcs.q new file mode 100644 index 0000000..5779986 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_date_funcs.q @@ -0,0 +1,74 @@ +SET hive.vectorized.execution.enabled = true; + +-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE date_udf_flight ( + origin_city_name STRING, + dest_city_name STRING, + fl_date DATE, + arr_delay FLOAT, + fl_num INT +); +LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight; + +CREATE TABLE date_udf_flight_orc ( + fl_date DATE, + fl_time TIMESTAMP +) STORED AS ORC; + +INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') FROM date_udf_flight; + +SELECT * FROM date_udf_flight_orc; + +EXPLAIN SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time) +FROM date_udf_flight_orc; + +SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time) +FROM date_udf_flight_orc; + +EXPLAIN SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date) +FROM date_udf_flight_orc; + +SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date) +FROM date_udf_flight_orc; + +EXPLAIN SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date) +FROM date_udf_flight_orc; + +-- Should all be true or NULL +SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date) +FROM date_udf_flight_orc; diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out new file mode 100644 index 0000000..c719f84 --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -0,0 +1,820 @@ +PREHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE date_udf_flight ( + origin_city_name STRING, + dest_city_name STRING, + fl_date DATE, + arr_delay FLOAT, + fl_num INT +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE date_udf_flight ( + origin_city_name STRING, + dest_city_name STRING, + fl_date DATE, + arr_delay FLOAT, + fl_num INT +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@date_udf_flight +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight +PREHOOK: type: LOAD +PREHOOK: Output: default@date_udf_flight +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight +POSTHOOK: type: LOAD +POSTHOOK: Output: default@date_udf_flight +PREHOOK: query: CREATE TABLE date_udf_flight_orc ( + fl_date DATE, + fl_time TIMESTAMP +) STORED AS ORC +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE date_udf_flight_orc ( + fl_date DATE, + fl_time TIMESTAMP +) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@date_udf_flight_orc +PREHOOK: query: INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') FROM date_udf_flight +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight +PREHOOK: Output: default@date_udf_flight_orc +POSTHOOK: query: INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') FROM date_udf_flight +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight +POSTHOOK: Output: default@date_udf_flight_orc +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +PREHOOK: query: SELECT * FROM date_udf_flight_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-31 2010-10-31 07:00:00 +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time) +FROM date_udf_flight_orc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time) +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), weekofyear(fl_time) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time) +FROM date_udf_flight_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time) +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +1287583200 2010 10 20 20 42 +1287583200 2010 10 20 20 42 +1287583200 2010 10 20 20 42 +1287583200 2010 10 20 20 42 +1287583200 2010 10 20 20 42 +1287583200 2010 10 20 20 42 +1287583200 2010 10 20 20 42 +1287583200 2010 10 20 20 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287842400 2010 10 23 23 42 +1287842400 2010 10 23 23 42 +1287842400 2010 10 23 23 42 +1287842400 2010 10 23 23 42 +1287842400 2010 10 23 23 42 +1287842400 2010 10 23 23 42 +1287842400 2010 10 23 23 42 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288188000 2010 10 27 27 43 +1288188000 2010 10 27 27 43 +1288188000 2010 10 27 27 43 +1288188000 2010 10 27 27 43 +1288188000 2010 10 27 27 43 +1288188000 2010 10 27 27 43 +1288188000 2010 10 27 27 43 +1288188000 2010 10 27 27 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288447200 2010 10 30 30 43 +1288447200 2010 10 30 30 43 +1288447200 2010 10 30 30 43 +1288447200 2010 10 30 30 43 +1288447200 2010 10 30 30 43 +1288447200 2010 10 30 30 43 +1288447200 2010 10 30 30 43 +1288533600 2010 10 31 31 43 +1288533600 2010 10 31 31 43 +1288533600 2010 10 31 31 43 +1288533600 2010 10 31 31 43 +1288533600 2010 10 31 31 43 +1288533600 2010 10 31 31 43 +1288533600 2010 10 31 31 43 +1288447200 2010 10 30 30 43 +1288447200 2010 10 30 30 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288360800 2010 10 29 29 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288274400 2010 10 28 28 43 +1288188000 2010 10 27 27 43 +1288188000 2010 10 27 27 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288101600 2010 10 26 26 43 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1288015200 2010 10 25 25 43 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1287928800 2010 10 24 24 42 +1287842400 2010 10 23 23 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287756000 2010 10 22 22 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287669600 2010 10 21 21 42 +1287583200 2010 10 20 20 42 +1287583200 2010 10 20 20 42 +1287842400 2010 10 23 23 42 +1287842400 2010 10 23 23 42 +1287842400 2010 10 23 23 42 +1288447200 2010 10 30 30 43 +1288447200 2010 10 30 30 43 +1287583200 2010 10 20 20 42 +1287669600 2010 10 21 21 42 +1287842400 2010 10 23 23 42 +1287928800 2010 10 24 24 42 +1288015200 2010 10 25 25 43 +1288101600 2010 10 26 26 43 +1288188000 2010 10 27 27 43 +1288274400 2010 10 28 28 43 +1288360800 2010 10 29 29 43 +1288533600 2010 10 31 31 43 +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date) +FROM date_udf_flight_orc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date) +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), weekofyear(fl_date) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date) +FROM date_udf_flight_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date) +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +1287558000 2010 10 20 20 42 +1287558000 2010 10 20 20 42 +1287558000 2010 10 20 20 42 +1287558000 2010 10 20 20 42 +1287558000 2010 10 20 20 42 +1287558000 2010 10 20 20 42 +1287558000 2010 10 20 20 42 +1287558000 2010 10 20 20 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287817200 2010 10 23 23 42 +1287817200 2010 10 23 23 42 +1287817200 2010 10 23 23 42 +1287817200 2010 10 23 23 42 +1287817200 2010 10 23 23 42 +1287817200 2010 10 23 23 42 +1287817200 2010 10 23 23 42 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288162800 2010 10 27 27 43 +1288162800 2010 10 27 27 43 +1288162800 2010 10 27 27 43 +1288162800 2010 10 27 27 43 +1288162800 2010 10 27 27 43 +1288162800 2010 10 27 27 43 +1288162800 2010 10 27 27 43 +1288162800 2010 10 27 27 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288422000 2010 10 30 30 43 +1288422000 2010 10 30 30 43 +1288422000 2010 10 30 30 43 +1288422000 2010 10 30 30 43 +1288422000 2010 10 30 30 43 +1288422000 2010 10 30 30 43 +1288422000 2010 10 30 30 43 +1288508400 2010 10 31 31 43 +1288508400 2010 10 31 31 43 +1288508400 2010 10 31 31 43 +1288508400 2010 10 31 31 43 +1288508400 2010 10 31 31 43 +1288508400 2010 10 31 31 43 +1288508400 2010 10 31 31 43 +1288422000 2010 10 30 30 43 +1288422000 2010 10 30 30 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288335600 2010 10 29 29 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288249200 2010 10 28 28 43 +1288162800 2010 10 27 27 43 +1288162800 2010 10 27 27 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1288076400 2010 10 26 26 43 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1287990000 2010 10 25 25 43 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287903600 2010 10 24 24 42 +1287817200 2010 10 23 23 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287730800 2010 10 22 22 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287644400 2010 10 21 21 42 +1287558000 2010 10 20 20 42 +1287558000 2010 10 20 20 42 +1287817200 2010 10 23 23 42 +1287817200 2010 10 23 23 42 +1287817200 2010 10 23 23 42 +1288422000 2010 10 30 30 43 +1288422000 2010 10 30 30 43 +1287558000 2010 10 20 20 42 +1287644400 2010 10 21 21 42 +1287817200 2010 10 23 23 42 +1287903600 2010 10 24 24 42 +1287990000 2010 10 25 25 43 +1288076400 2010 10 26 26 43 +1288162800 2010 10 27 27 43 +1288249200 2010 10 28 28 43 +1288335600 2010 10 29 29 43 +1288508400 2010 10 31 31 43 +PREHOOK: query: EXPLAIN SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date) +FROM date_udf_flight_orc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date) +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: -- Should all be true or NULL +SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date) +FROM date_udf_flight_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: -- Should all be true or NULL +SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date) +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true +true true true true true