diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index 9f5c793..a095141 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -26,12 +26,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Calendar; -import java.util.Date; public class VectorUDFDateAddColCol extends VectorExpression { private static final long serialVersionUID = 1L; @@ -41,7 +41,7 @@ private int outputColumn; protected boolean isPositive = true; private transient final Calendar calendar = Calendar.getInstance(); - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient DateTimeFormatter jodaDTF = DateTimeFormat.forPattern("yyyy-MM-dd"); private transient final Text text = new Text(); public VectorUDFDateAddColCol(int colNum1, int colNum2, int outputColumn) { @@ -159,8 +159,7 @@ public void evaluate(VectorizedRowBatch batch) { } else { calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] - (int) numDays)); } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); + text.set(jodaDTF.print(calendar.getTimeInMillis())); return Arrays.copyOf(text.getBytes(), text.getLength()); } @@ -172,8 +171,7 @@ public void evaluate(VectorizedRowBatch batch) { } else { calendar.add(Calendar.DATE, (int) -numDays); } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); + text.set(jodaDTF.print(calendar.getTimeInMillis())); return Arrays.copyOf(text.getBytes(), text.getLength()); } @@ -185,8 +183,16 @@ protected void evaluateString(BytesColumnVector inputColumnVector1, LongColumnVe } else { text.set(inputColumnVector1.vector[i], inputColumnVector1.start[i], inputColumnVector1.length[i]); try { - calendar.setTime(formatter.parse(text.toString())); - } catch (ParseException e) { + //Consider only date part when HH:mm:ss is provided. + String dateString = text.toString(); + int index = dateString.indexOf(" "); + if (!dateString.isEmpty() && index > 0) { + dateString = dateString.substring(0, index); + } + + DateTime jodaTime = jodaDTF.parseDateTime(dateString); + calendar.setTimeInMillis(jodaTime.getMillis()); + } catch (IllegalArgumentException e) { outputVector.noNulls = false; outputVector.isNull[i] = true; } @@ -195,8 +201,7 @@ protected void evaluateString(BytesColumnVector inputColumnVector1, LongColumnVe } else { calendar.add(Calendar.DATE, -(int) inputColumnVector2.vector[i]); } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); + text.set(jodaDTF.print(calendar.getTimeInMillis())); outputVector.vector[i] = Arrays.copyOf(text.getBytes(), text.getLength()); outputVector.start[i] = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index 6390ecd..ec65d22 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -26,12 +26,11 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.Arrays; -import java.util.Calendar; -import java.util.Date; public class VectorUDFDateAddColScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -40,8 +39,7 @@ private int outputColumn; private int numDays; protected boolean isPositive = true; - private transient final Calendar calendar = Calendar.getInstance(); - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient DateTimeFormatter jodaDTF = DateTimeFormat.forPattern("yyyy-MM-dd"); private transient final Text text = new Text(); public VectorUDFDateAddColScalar(int colNum, long numDays, int outputColumn) { @@ -123,18 +121,19 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: + DateTime dateTime = new DateTime(); if (inputCol.noNulls) { outV.noNulls = true; if (batch.selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; - outV.vector[i] = evaluateTimestamp(inputCol, i); + outV.vector[i] = evaluateTimestamp(dateTime, inputCol, i); outV.start[i] = 0; outV.length[i] = outV.vector[i].length; } } else { for(int i = 0; i < n; i++) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outV.vector[i] = evaluateTimestamp(dateTime, inputCol, i); outV.start[i] = 0; outV.length[i] = outV.vector[i].length; } @@ -148,7 +147,7 @@ public void evaluate(VectorizedRowBatch batch) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outV.vector[i] = evaluateTimestamp(dateTime, inputCol, i); outV.start[i] = 0; outV.length[i] = outV.vector[i].length; } @@ -157,7 +156,7 @@ public void evaluate(VectorizedRowBatch batch) { for(int i = 0; i < n; i++) { outV.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - outV.vector[i] = evaluateTimestamp(inputCol, i); + outV.vector[i] = evaluateTimestamp(dateTime, inputCol, i); outV.start[i] = 0; outV.length[i] = outV.vector[i].length; } @@ -208,28 +207,28 @@ public void evaluate(VectorizedRowBatch batch) { } } - protected byte[] evaluateTimestamp(ColumnVector columnVector, int index) { + protected byte[] evaluateTimestamp(DateTime dateTime, + ColumnVector columnVector, int index) { TimestampColumnVector tcv = (TimestampColumnVector) columnVector; - calendar.setTimeInMillis(tcv.getTimestampMilliseconds(index)); + dateTime.withMillis(tcv.getTimestampMilliseconds(index)); if (isPositive) { - calendar.add(Calendar.DATE, numDays); + dateTime = dateTime.plusDays(numDays); } else { - calendar.add(Calendar.DATE, -numDays); + dateTime = dateTime.minusDays(numDays); } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); + text.set(jodaDTF.print(dateTime.getMillis())); return Arrays.copyOf(text.getBytes(), text.getLength()); } protected byte[] evaluateDate(ColumnVector columnVector, int index) { LongColumnVector lcv = (LongColumnVector) columnVector; + long millis = 0l; if (isPositive) { - calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] + numDays)); + millis = (DateWritable.daysToMillis((int) lcv.vector[index] + numDays)); } else { - calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] - numDays)); + millis = (DateWritable.daysToMillis((int) lcv.vector[index] - numDays)); } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); + text.set(jodaDTF.print(millis)); return Arrays.copyOf(text.getBytes(), text.getLength()); } @@ -237,17 +236,24 @@ protected void evaluateString(ColumnVector columnVector, BytesColumnVector outpu BytesColumnVector bcv = (BytesColumnVector) columnVector; text.set(bcv.vector[i], bcv.start[i], bcv.length[i]); try { - calendar.setTime(formatter.parse(text.toString())); - } catch (ParseException e) { + //Consider only date part when HH:mm:ss is provided. + String dateString = text.toString(); + int index = dateString.indexOf(" "); + if (!dateString.isEmpty() && index > 0) { + dateString = dateString.substring(0, index); + } + + DateTime jodaTime = jodaDTF.parseDateTime(dateString); + if (isPositive) { + jodaTime.plusDays(numDays); + } else { + jodaTime.minusDays(numDays); + } + text.set(jodaDTF.print(jodaTime.getMillis())); + } catch (IllegalArgumentException e) { outputVector.isNull[i] = true; + return; } - if (isPositive) { - calendar.add(Calendar.DATE, numDays); - } else { - calendar.add(Calendar.DATE, -numDays); - } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); byte[] bytes = text.getBytes(); int size = text.getLength(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 2d0a28a..d2b693b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -24,14 +24,12 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; -import java.io.UnsupportedEncodingException; import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.Arrays; -import java.util.Calendar; -import java.util.Date; public class VectorUDFDateAddScalarCol extends VectorExpression { private static final long serialVersionUID = 1L; @@ -42,10 +40,9 @@ private Timestamp timestampValue = null; private byte[] stringValue = null; protected boolean isPositive = true; - private transient final Calendar calendar = Calendar.getInstance(); - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient DateTimeFormatter jodaDTF = DateTimeFormat.forPattern("yyyy-MM-dd"); private transient final Text text = new Text(); - private transient Date baseDate = new Date(); + private transient DateTime baseDate = new DateTime(); public VectorUDFDateAddScalarCol() { super(); @@ -81,18 +78,27 @@ public void evaluate(VectorizedRowBatch batch) { switch (inputTypes[0]) { case DATE: - baseDate.setTime(DateWritable.daysToMillis((int) longValue)); + baseDate = baseDate.withMillis(DateWritable.daysToMillis((int) longValue)); + //baseDate.setTime(DateWritable.daysToMillis((int) longValue)); break; case TIMESTAMP: - baseDate.setTime(timestampValue.getTime()); + baseDate = baseDate.withMillis(timestampValue.getTime()); + //baseDate.setTime(timestampValue.getTime()); break; case STRING: case CHAR: case VARCHAR: try { - baseDate = formatter.parse(new String(stringValue, "UTF-8")); + //Consider only date part when HH:mm:ss is provided. + String dateString = new String(stringValue, "UTF-8"); + int index = dateString.indexOf(" "); + if (!dateString.isEmpty() && index > 0) { + dateString = dateString.substring(0, index); + } + + baseDate = jodaDTF.parseDateTime(dateString); break; } catch (Exception e) { outV.noNulls = false; @@ -155,16 +161,18 @@ public void evaluate(VectorizedRowBatch batch) { } } - private void evaluate(Date baseDate, long numDays, BytesColumnVector output, int i) { - calendar.setTime(baseDate); + private void evaluate(DateTime baseDate, long numDays, BytesColumnVector output, + int i) { + //calendar.setTime(baseDate); if (isPositive) { - calendar.add(Calendar.DATE, (int) numDays); + //calendar.add(Calendar.DATE, (int) numDays); + baseDate.plusDays((int) numDays); } else { - calendar.add(Calendar.DATE, -(int) numDays); + //calendar.add(Calendar.DATE, -(int) numDays); + baseDate.minusDays((int) numDays); } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); + text.set(jodaDTF.print(baseDate.getMillis())); int size = text.getLength(); output.vector[i] = Arrays.copyOf(text.getBytes(), size); output.start[i] = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java index e27ac6a..3766501 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; @@ -37,7 +40,9 @@ public VectorUDFDateString(int colNum, int outputColumn) { super(colNum, outputColumn, new StringUnaryUDF.IUDFUnaryString() { Text t = new Text(); - final transient SimpleDateFormat formatter = DateUtils.getDateFormat(); + //final transient SimpleDateFormat formatter = DateUtils.getDateFormat(); + final transient DateTimeFormatter formatter = + DateTimeFormat.forPattern("yyyy-MM-dd"); @Override public Text evaluate(Text s) { @@ -45,10 +50,17 @@ public Text evaluate(Text s) { return null; } try { - Date date = formatter.parse(s.toString()); - t.set(formatter.format(date)); + String dateString = s.toString(); + int index = dateString.indexOf(" "); + if (!dateString.isEmpty() && index > 0) { + dateString = dateString.substring(0, index); + } + DateTime jodaTime = formatter.parseDateTime(dateString); + t.set(formatter.print(jodaTime.getMillis())); + //Date date = formatter.parse(s.toString()); + //t.set(formatter.format(date)); return t; - } catch (ParseException e) { + } catch (IllegalArgumentException e) { return null; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java index 92a72bf..4b4af81 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java @@ -18,10 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.Calendar; -import java.util.Date; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -45,6 +42,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.IntWritable; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; /** * UDFDateAdd. @@ -67,7 +67,7 @@ + " '2009-07-31'") @VectorizedExpressions({VectorUDFDateAddColScalar.class, VectorUDFDateAddScalarCol.class, VectorUDFDateAddColCol.class}) public class GenericUDFDateAdd extends GenericUDF { - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient DateTimeFormatter jodaDTF = DateTimeFormat.forPattern("yyyy-MM-dd"); private transient Converter dateConverter; private transient Converter daysConverter; private transient PrimitiveCategory inputType1; @@ -169,11 +169,18 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { case STRING: String dateString = dateConverter.convert(arguments[0].get()).toString(); try { - calendar.setTime(formatter.parse(dateString.toString())); - } catch (ParseException e) { + //Consider only date part when HH:mm:ss is provided. + if (!dateString.isEmpty()) { + dateString = dateString.substring(0, dateString.indexOf(" ")); + } + + DateTime jodaTime = jodaDTF.parseDateTime(dateString.toString()); + jodaTime = jodaTime.plusDays(toBeAdded); + output.set(jodaDTF.print(jodaTime.getMillis())); + return output; + } catch (IllegalArgumentException e) { return null; } - break; case TIMESTAMP: Timestamp ts = ((TimestampWritable) dateConverter.convert(arguments[0].get())) .getTimestamp(); @@ -189,8 +196,7 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { } calendar.add(Calendar.DAY_OF_MONTH, toBeAdded); - Date newDate = calendar.getTime(); - output.set(formatter.format(newDate)); + output.set(jodaDTF.print(calendar.getTimeInMillis())); return output; }