diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java index bbd734c..d5cce95 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java @@ -20,6 +20,8 @@ import java.util.Calendar; +import static org.apache.hadoop.hive.ql.exec.vector.DateUtils.getDayOfMonth; + /** * Expression to get day of month. * Extends {@link VectorUDFTimestampFieldLong} @@ -32,6 +34,11 @@ public VectorUDFDayOfMonthLong(int colNum, int outputColumn) { super(Calendar.DAY_OF_MONTH, colNum, outputColumn); } + @Override + protected long getFieldWithDays(long daysSinceEpoch) { + return getDayOfMonth(daysSinceEpoch); + } + public VectorUDFDayOfMonthLong() { super(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java index 21552e1..9b61c75 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java @@ -20,6 +20,8 @@ import java.util.Calendar; +import static org.apache.hadoop.hive.ql.exec.vector.DateUtils.getMonth; + /** * Returns month value. * Extends {@link VectorUDFTimestampFieldLong} @@ -42,4 +44,8 @@ protected long getField(long time) { return 1 + super.getField(time); } + @Override + protected long getFieldWithDays(long daysSinceEpoch) { + return getMonth(daysSinceEpoch); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java index f840f09..b9ecd65 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java @@ -21,6 +21,7 @@ import java.sql.Timestamp; import java.util.Calendar; +import org.apache.hadoop.hive.ql.exec.vector.DateUtils; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -80,6 +81,11 @@ protected long getField(long time) { return calendar.get(field); } + protected long getFieldWithDays(long daysSinceEpoch) { + calendar.setTimeInMillis(daysSinceEpoch * DateUtils.DAY_IN_MILLIS); + return calendar.get(field); + } + @Override public void evaluate(VectorizedRowBatch batch) { LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; @@ -96,35 +102,71 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; - if (inputCol.noNulls) { - outV.noNulls = true; - if (batch.selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = getField(inputCol.vector[i]); + if (DateUtils.isDateVector(inputCol)) { + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getFieldWithDays(inputCol.vector[i]); + } + } else { + for (int i = 0; i < n; i++) { + outV.vector[i] = getFieldWithDays(inputCol.vector[i]); + } } } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = getField(inputCol.vector[i]); + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getFieldWithDays(inputCol.vector[i]); + } + } + } else { + for (int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getFieldWithDays(inputCol.vector[i]); + } + } } } } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (batch.selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getField(inputCol.vector[i]); + } + } else { + for (int i = 0; i < n; i++) { outV.vector[i] = getField(inputCol.vector[i]); } } } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getField(inputCol.vector[i]); + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getField(inputCol.vector[i]); + } + } + } else { + for (int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getField(inputCol.vector[i]); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java index 0feed95..e3b9af6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java @@ -28,13 +28,18 @@ @Override protected long getField(long time) { - long ms = (time / (1000*1000*1000)) * 1000; - long remainder = time % (1000*1000*1000); + long ms = (time / (1000 * 1000 * 1000)) * 1000; + long remainder = time % (1000 * 1000 * 1000); /* negative timestamps need to be adjusted */ - if(remainder < 0) { + if (remainder < 0) { ms -= 1000; } - return ms/1000; + return ms / 1000; + } + + @Override + protected long getFieldWithDays(long daysSinceEpoch) { + return daysSinceEpoch * 24 * 60 * 60; } public VectorUDFUnixTimeStampLong(int colNum, int outputColumn) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java index abd87fe..cb9c90c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java @@ -21,6 +21,8 @@ import java.util.Arrays; import java.util.Calendar; +import static org.apache.hadoop.hive.ql.exec.vector.DateUtils.getYear; + /** * Expression to get year as a long. * Extends {@link VectorUDFTimestampFieldLong} @@ -57,6 +59,11 @@ protected long getField(long time) { } } + @Override + protected long getFieldWithDays(long daysSinceEpoch) { + return getYear(daysSinceEpoch); + } + public VectorUDFYearLong(int colNum, int outputColumn) { super(Calendar.YEAR, colNum, outputColumn); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 173fb8d..95f166b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateUtils; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -1018,6 +1019,35 @@ Object next(Object previous) throws IOException { } @Override + Object nextVector(Object previousVector, long batchSize) throws IOException { + LongColumnVector result = null; + if (previousVector == null) { + result = new LongColumnVector(); + } else { + result = (LongColumnVector) previousVector; + } + + // Read present/isNull stream + super.nextVector(result, batchSize); + + reader.nextVector(result, batchSize); + + if (result.isRepeating) { + batchSize = 1; + } + + // Non repeating values preset in the vector. Iterate thru the vector and populate the time + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = result.vector[result.isRepeating ? 0 : i]; + } + } + DateUtils.setDateVector(result); + + return result; + } + + @Override void skipRows(long items) throws IOException { reader.skip(countNonNulls(items)); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java index 20add85..7db793d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java @@ -22,6 +22,7 @@ import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; +import java.util.GregorianCalendar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; @@ -46,11 +47,12 @@ @VectorizedExpressions({VectorUDFDayOfMonthLong.class, VectorUDFDayOfMonthString.class}) public class UDFDayOfMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private final Calendar calendar = Calendar.getInstance(); + private final GregorianCalendar calendar = new GregorianCalendar(); private final IntWritable result = new IntWritable(); public UDFDayOfMonth() { + calendar.setGregorianChange(new Date(Long.MIN_VALUE)); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java index 4e34dbf..5e1e913 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java @@ -22,6 +22,7 @@ import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; +import java.util.GregorianCalendar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; @@ -44,11 +45,12 @@ @VectorizedExpressions({VectorUDFMonthLong.class, VectorUDFMonthString.class}) public class UDFMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private final Calendar calendar = Calendar.getInstance(); + private final GregorianCalendar calendar = new GregorianCalendar(); private IntWritable result = new IntWritable(); public UDFMonth() { + calendar.setGregorianChange(new Date(Long.MIN_VALUE)); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java index 1853860..841aa6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java @@ -22,6 +22,7 @@ import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; +import java.util.GregorianCalendar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; @@ -46,11 +47,12 @@ @VectorizedExpressions({VectorUDFYearLong.class, VectorUDFYearString.class}) public class UDFYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private final Calendar calendar = Calendar.getInstance(); + private final GregorianCalendar calendar = new GregorianCalendar(); private final IntWritable result = new IntWritable(); public UDFYear() { + calendar.setGregorianChange(new Date(Long.MIN_VALUE)); } /** @@ -71,7 +73,11 @@ public IntWritable evaluate(Text dateString) { try { Date date = formatter.parse(dateString.toString()); calendar.setTime(date); - result.set(calendar.get(Calendar.YEAR)); + if (calendar.get(Calendar.ERA) == GregorianCalendar.AD) { + result.set(calendar.get(Calendar.YEAR)); + } else { + result.set(-calendar.get(Calendar.YEAR) + 1); + } return result; } catch (ParseException e) { return null; @@ -84,7 +90,11 @@ public IntWritable evaluate(DateWritable d) { } calendar.setTime(d.get()); - result.set(calendar.get(Calendar.YEAR)); + if (calendar.get(Calendar.ERA) == GregorianCalendar.AD) { + result.set(calendar.get(Calendar.YEAR)); + } else { + result.set(-calendar.get(Calendar.YEAR) + 1); + } return result; } @@ -94,7 +104,11 @@ public IntWritable evaluate(TimestampWritable t) { } calendar.setTime(t.getTimestamp()); - result.set(calendar.get(Calendar.YEAR)); + if (calendar.get(Calendar.ERA) == GregorianCalendar.AD) { + result.set(calendar.get(Calendar.YEAR)); + } else { + result.set(-calendar.get(Calendar.YEAR) + 1); + } return result; }