diff --git a/common/src/java/org/apache/hive/common/util/DateParser.java b/common/src/java/org/apache/hive/common/util/DateParser.java new file mode 100644 index 0000000..958b237 --- /dev/null +++ b/common/src/java/org/apache/hive/common/util/DateParser.java @@ -0,0 +1,46 @@ +package org.apache.hive.common.util; + +import java.sql.Date; + +import org.joda.time.MutableDateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; + +/** + * Date parser using Joda DateTimeFormatter. + */ +public class DateParser { + protected DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd"); + MutableDateTime mdt = new MutableDateTime(0); + + public Date parseDate(String strValue) { + Date result = new Date(0); + if (parseDate(strValue, result)) { + return result; + } + return null; + } + + public boolean parseDate(String strValue, Date result) { + if (parseDate(strValue, mdt)) { + result.setTime(mdt.getMillis()); + return true; + } + return false; + } + + protected boolean parseDate(String strValue, MutableDateTime mdt) { + // Reset time-related fields + mdt.setTime(0); + + try { + int status = fmt.parseInto(mdt, strValue.trim(), 0); + if (status < 0) { + return false; + } + return true; + } catch (Exception err) { + return false; + } + } +} diff --git a/common/src/test/org/apache/hive/common/util/TestDateParser.java b/common/src/test/org/apache/hive/common/util/TestDateParser.java new file mode 100644 index 0000000..4f12adc --- /dev/null +++ b/common/src/test/org/apache/hive/common/util/TestDateParser.java @@ -0,0 +1,49 @@ +package org.apache.hive.common.util; + +import static org.junit.Assert.*; +import org.junit.Test; + +import java.sql.Date; + +public class TestDateParser { + DateParser parser = new DateParser(); + Date date = new Date(0); + + void checkValidCase(String strValue, Date expected) { + Date dateValue = parser.parseDate(strValue); + assertEquals(expected, dateValue); + + assertTrue(parser.parseDate(strValue, date)); + assertEquals(expected, date); + } + + void checkInvalidCase(String strValue) { + Date dateValue = parser.parseDate(strValue); + assertNull(dateValue); + + assertFalse(parser.parseDate(strValue, date)); + } + + @Test + public void testValidCases() throws Exception { + checkValidCase("1945-12-31", Date.valueOf("1945-12-31")); + checkValidCase("1946-01-01", Date.valueOf("1946-01-01")); + checkValidCase("2001-11-12", Date.valueOf("2001-11-12")); + + // Timestamp strings should parse ok + checkValidCase("2001-11-12 01:02:03", Date.valueOf("2001-11-12")); + + // Leading spaces + checkValidCase(" 1946-01-01", Date.valueOf("1946-01-01")); + checkValidCase(" 2001-11-12 01:02:03", Date.valueOf("2001-11-12")); + } + + @Test + public void testInvalidCases() throws Exception { + checkInvalidCase("2001"); + checkInvalidCase("2001-01"); + checkInvalidCase("abc"); + checkInvalidCase(" 2001 "); + checkInvalidCase("a2001-01-01"); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index 98fdf4a..e456b12 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -23,9 +23,9 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hive.common.util.DateParser; -import java.util.Date; -import java.text.SimpleDateFormat; +import java.nio.charset.StandardCharsets; /** * Casts a string vector to a date vector. @@ -36,7 +36,7 @@ private int inputColumn; private int outputColumn; private transient java.sql.Date sqlDate = new java.sql.Date(0); - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient DateParser dateParser = new DateParser(); public CastStringToDate() { @@ -115,15 +115,15 @@ public void evaluate(VectorizedRowBatch batch) { } private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { - try { - Date utilDate = formatter.parse(new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); - sqlDate.setTime(utilDate.getTime()); + String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8); + if (dateParser.parseDate(dateString, sqlDate)) { outV.vector[i] = DateWritable.dateToDays(sqlDate); - } catch (Exception e) { - outV.vector[i] = 1; - outV.isNull[i] = true; - outV.noNulls = false; + return; } + + outV.vector[i] = 1; + outV.isNull[i] = true; + outV.noNulls = false; } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index 05dd93e..4ce6e20 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -26,12 +26,10 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateParser; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.Arrays; -import java.util.Calendar; -import java.util.Date; +import java.sql.Date; public class VectorUDFDateAddColCol extends VectorExpression { private static final long serialVersionUID = 1L; @@ -40,9 +38,9 @@ private int colNum2; private int outputColumn; protected boolean isPositive = true; - private transient final Calendar calendar = Calendar.getInstance(); - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient final Text text = new Text(); + private transient final Date date = new Date(0); + private transient final DateParser dateParser = new DateParser(); public VectorUDFDateAddColCol(int colNum1, int colNum2, int outputColumn) { this(); @@ -68,8 +66,8 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector2 = inputColVector2.vector; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - byte[][] outputVector = outV.vector; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; if (n <= 0) { // Nothing to do return; @@ -86,20 +84,14 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = evaluateDate(inputColVector1, 0, vector2[0]); - outV.start[0] = 0; - outV.length[0] = outputVector[0].length; } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = evaluateDate(inputColVector1, i, vector2[i]); - outV.start[i] = 0; - outV.length[i] = outputVector[0].length; } } else { for (int i = 0; i != n; i++) { outputVector[i] = evaluateDate(inputColVector1, i, vector2[i]); - outV.start[i] = 0; - outV.length[i] = outputVector[0].length; } } break; @@ -115,14 +107,10 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = evaluateTimestamp(inputColVector1, i, vector2[i]); - outV.start[i] = 0; - outV.length[i] = outputVector[0].length; } } else { for (int i = 0; i != n; i++) { outputVector[i] = evaluateTimestamp(inputColVector1, i, vector2[i]); - outV.start[i] = 0; - outV.length[i] = outputVector[0].length; } } break; @@ -135,15 +123,15 @@ public void evaluate(VectorizedRowBatch batch) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; - evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, 0); + evaluateString((BytesColumnVector) inputColVector1, outV, 0, vector2[0]); } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, i); + evaluateString((BytesColumnVector) inputColVector1, outV, i, vector2[i]); } } else { for (int i = 0; i != n; i++) { - evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, i); + evaluateString((BytesColumnVector) inputColVector1, outV, i, vector2[i]); } } break; @@ -152,55 +140,48 @@ public void evaluate(VectorizedRowBatch batch) { } } - protected byte[] evaluateDate(ColumnVector columnVector, int index, long numDays) { + protected long evaluateDate(ColumnVector columnVector, int index, long numDays) { LongColumnVector lcv = (LongColumnVector) columnVector; + long days = lcv.vector[index]; if (isPositive) { - calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] + (int) numDays)); + days += numDays; } else { - calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] - (int) numDays)); + days -= numDays; } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); - return Arrays.copyOf(text.getBytes(), text.getLength()); + return days; } - protected byte[] evaluateTimestamp(ColumnVector columnVector, int index, long numDays) { + protected long evaluateTimestamp(ColumnVector columnVector, int index, long numDays) { TimestampColumnVector tcv = (TimestampColumnVector) columnVector; - calendar.setTimeInMillis(tcv.getTime(index)); + // Convert to date value (in days) + long days = DateWritable.millisToDays(tcv.getTime(index)); if (isPositive) { - calendar.add(Calendar.DATE, (int) numDays); + days += numDays; } else { - calendar.add(Calendar.DATE, (int) -numDays); + days -= numDays; } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); - return Arrays.copyOf(text.getBytes(), text.getLength()); + return days; } - protected void evaluateString(BytesColumnVector inputColumnVector1, LongColumnVector inputColumnVector2, - BytesColumnVector outputVector, int i) { - if (inputColumnVector1.isNull[i] || inputColumnVector2.isNull[i]) { + protected void evaluateString(BytesColumnVector inputColumnVector1, LongColumnVector outputVector, int index, long numDays) { + if (inputColumnVector1.isNull[index]) { outputVector.noNulls = false; - outputVector.isNull[i] = true; + outputVector.isNull[index] = true; } else { - text.set(inputColumnVector1.vector[i], inputColumnVector1.start[i], inputColumnVector1.length[i]); - try { - calendar.setTime(formatter.parse(text.toString())); - } catch (ParseException e) { + text.set(inputColumnVector1.vector[index], inputColumnVector1.start[index], inputColumnVector1.length[index]); + boolean parsed = dateParser.parseDate(text.toString(), date); + if (!parsed) { outputVector.noNulls = false; - outputVector.isNull[i] = true; + outputVector.isNull[index] = true; + return; } + long days = DateWritable.millisToDays(date.getTime()); if (isPositive) { - calendar.add(Calendar.DATE, (int) inputColumnVector2.vector[i]); + days += numDays; } else { - calendar.add(Calendar.DATE, -(int) inputColumnVector2.vector[i]); + days -= numDays; } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); - - outputVector.vector[i] = Arrays.copyOf(text.getBytes(), text.getLength()); - outputVector.start[i] = 0; - outputVector.length[i] = text.getLength(); + outputVector.vector[index] = days; } } @@ -211,7 +192,7 @@ public int getOutputColumn() { @Override public String getOutputType() { - return "string"; + return "date"; } public int getColNum1() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index 59ca61e..0e09f49 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -26,12 +26,9 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateParser; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Date; +import java.sql.Date; public class VectorUDFDateAddColScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -40,9 +37,9 @@ private int outputColumn; private int numDays; protected boolean isPositive = true; - private transient final Calendar calendar = Calendar.getInstance(); - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient final Text text = new Text(); + private transient final DateParser dateParser = new DateParser(); + private transient final Date date = new Date(0); public VectorUDFDateAddColScalar(int colNum, long numDays, int outputColumn) { super(); @@ -62,7 +59,7 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; ColumnVector inputCol = batch.cols[this.colNum]; /* every line below this is identical for evaluateLong & evaluateString */ final int n = inputCol.isRepeating ? 1 : batch.size; @@ -85,14 +82,10 @@ public void evaluate(VectorizedRowBatch batch) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = evaluateDate(inputCol, i); - outV.start[i] = 0; - outV.length[i] = outV.vector[i].length; } } else { for(int i = 0; i < n; i++) { outV.vector[i] = evaluateDate(inputCol, i); - outV.start[i] = 0; - outV.length[i] = outV.vector[i].length; } } } else { @@ -105,8 +98,6 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { outV.vector[i] = evaluateDate(inputCol, i); - outV.start[i] = 0; - outV.length[i] = outV.vector[i].length; } } } else { @@ -114,8 +105,6 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { outV.vector[i] = evaluateDate(inputCol, i); - outV.start[i] = 0; - outV.length[i] = outV.vector[i].length; } } } @@ -129,14 +118,10 @@ public void evaluate(VectorizedRowBatch batch) { for(int j=0; j < n; j++) { int i = sel[j]; outV.vector[i] = evaluateTimestamp(inputCol, i); - outV.start[i] = 0; - outV.length[i] = outV.vector[i].length; } } else { for(int i = 0; i < n; i++) { outV.vector[i] = evaluateTimestamp(inputCol, i); - outV.start[i] = 0; - outV.length[i] = outV.vector[i].length; } } } else { @@ -149,8 +134,6 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { outV.vector[i] = evaluateTimestamp(inputCol, i); - outV.start[i] = 0; - outV.length[i] = outV.vector[i].length; } } } else { @@ -158,8 +141,6 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { outV.vector[i] = evaluateTimestamp(inputCol, i); - outV.start[i] = 0; - outV.length[i] = outV.vector[i].length; } } } @@ -208,52 +189,45 @@ public void evaluate(VectorizedRowBatch batch) { } } - protected byte[] evaluateTimestamp(ColumnVector columnVector, int index) { + protected long evaluateTimestamp(ColumnVector columnVector, int index) { TimestampColumnVector tcv = (TimestampColumnVector) columnVector; - calendar.setTimeInMillis(tcv.getTime(index)); + // Convert to date value (in days) + long days = DateWritable.millisToDays(tcv.getTime(index)); if (isPositive) { - calendar.add(Calendar.DATE, numDays); + days += numDays; } else { - calendar.add(Calendar.DATE, -numDays); + days -= numDays; } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); - return Arrays.copyOf(text.getBytes(), text.getLength()); + return days; } - protected byte[] evaluateDate(ColumnVector columnVector, int index) { + protected long evaluateDate(ColumnVector columnVector, int index) { LongColumnVector lcv = (LongColumnVector) columnVector; + long days = lcv.vector[index]; if (isPositive) { - calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] + numDays)); + days += numDays; } else { - calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] - numDays)); + days -= numDays; } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); - return Arrays.copyOf(text.getBytes(), text.getLength()); + return days; } - protected void evaluateString(ColumnVector columnVector, BytesColumnVector outputVector, int i) { + protected void evaluateString(ColumnVector columnVector, LongColumnVector outputVector, int i) { BytesColumnVector bcv = (BytesColumnVector) columnVector; text.set(bcv.vector[i], bcv.start[i], bcv.length[i]); - try { - calendar.setTime(formatter.parse(text.toString())); - } catch (ParseException e) { + boolean parsed = dateParser.parseDate(text.toString(), date); + if (!parsed) { + outputVector.noNulls = false; outputVector.isNull[i] = true; + return; } + long days = DateWritable.millisToDays(date.getTime()); if (isPositive) { - calendar.add(Calendar.DATE, numDays); + days += numDays; } else { - calendar.add(Calendar.DATE, -numDays); + days -= numDays; } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); - - byte[] bytes = text.getBytes(); - int size = text.getLength(); - outputVector.vector[i] = Arrays.copyOf(bytes, size); - outputVector.start[i] = 0; - outputVector.length[i] = size; + outputVector.vector[i] = days; } @Override @@ -263,7 +237,7 @@ public int getOutputColumn() { @Override public String getOutputType() { - return "string"; + return "date"; } public int getColNum() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 2d0a28a..724ea45 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -18,20 +18,16 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateParser; -import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.sql.Date; import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Date; + public class VectorUDFDateAddScalarCol extends VectorExpression { private static final long serialVersionUID = 1L; @@ -42,10 +38,8 @@ private Timestamp timestampValue = null; private byte[] stringValue = null; protected boolean isPositive = true; - private transient final Calendar calendar = Calendar.getInstance(); - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private transient final Text text = new Text(); - private transient Date baseDate = new Date(); + private transient final DateParser dateParser = new DateParser(); + private transient final Date baseDate = new Date(0); public VectorUDFDateAddScalarCol() { super(); @@ -77,7 +71,7 @@ public void evaluate(VectorizedRowBatch batch) { final int n = inputCol.isRepeating ? 1 : batch.size; int[] sel = batch.selected; final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; switch (inputTypes[0]) { case DATE: @@ -91,10 +85,8 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - try { - baseDate = formatter.parse(new String(stringValue, "UTF-8")); - break; - } catch (Exception e) { + boolean parsed = dateParser.parseDate(new String(stringValue, StandardCharsets.UTF_8), baseDate); + if (!parsed) { outV.noNulls = false; if (selectedInUse) { for(int j=0; j < n; j++) { @@ -108,6 +100,7 @@ public void evaluate(VectorizedRowBatch batch) { } return; } + break; default: throw new Error("Unsupported input type " + inputTypes[0].name()); } @@ -120,16 +113,17 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; + long baseDateDays = DateWritable.millisToDays(baseDate.getTime()); if (inputCol.noNulls) { outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; - evaluate(baseDate, inputCol.vector[i], outV, i); + evaluate(baseDateDays, inputCol.vector[i], outV, i); } } else { for(int i = 0; i < n; i++) { - evaluate(baseDate, inputCol.vector[i], outV, i); + evaluate(baseDateDays, inputCol.vector[i], outV, i); } } } else { @@ -141,34 +135,28 @@ public void evaluate(VectorizedRowBatch batch) { int i = sel[j]; outV.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluate(baseDate, inputCol.vector[i], outV, i); + evaluate(baseDateDays, inputCol.vector[i], outV, i); } } } else { for(int i = 0; i < n; i++) { outV.isNull[i] = inputCol.isNull[i]; if (!inputCol.isNull[i]) { - evaluate(baseDate, inputCol.vector[i], outV, i); + evaluate(baseDateDays, inputCol.vector[i], outV, i); } } } } } - private void evaluate(Date baseDate, long numDays, BytesColumnVector output, int i) { - calendar.setTime(baseDate); - + private void evaluate(long baseDateDays, long numDays, LongColumnVector output, int i) { + long result = baseDateDays; if (isPositive) { - calendar.add(Calendar.DATE, (int) numDays); + result += numDays; } else { - calendar.add(Calendar.DATE, -(int) numDays); + result -= numDays; } - Date newDate = calendar.getTime(); - text.set(formatter.format(newDate)); - int size = text.getLength(); - output.vector[i] = Arrays.copyOf(text.getBytes(), size); - output.start[i] = 0; - output.length[i] = size; + output.vector[i] = result; } @Override @@ -178,7 +166,7 @@ public int getOutputColumn() { @Override public String getOutputType() { - return "string"; + return "date"; } public int getColNum() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java index a58bfb5..3fd2e9c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java @@ -26,12 +26,13 @@ import java.sql.Date; import java.text.SimpleDateFormat; -public class VectorUDFDateLong extends LongToStringUnaryUDF { +/** + * Vectorized version of TO_DATE(TIMESTAMP)/TO_DATE(DATE). + * As TO_DATE() now returns DATE type, this should be the same behavior as the DATE cast operator. + */ +public class VectorUDFDateLong extends CastLongToDate { private static final long serialVersionUID = 1L; - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private transient Date date = new Date(0); - public VectorUDFDateLong() { super(); } @@ -39,35 +40,4 @@ public VectorUDFDateLong() { public VectorUDFDateLong(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); } - - @Override - protected void func(BytesColumnVector outV, long[] vector, int i) { - switch (inputTypes[0]) { - case DATE: - date.setTime(DateWritable.daysToMillis((int) vector[i])); - break; - - default: - throw new Error("Unsupported input type " + inputTypes[0].name()); - } - try { - byte[] bytes = formatter.format(date).getBytes("UTF-8"); - outV.setRef(i, bytes, 0, bytes.length); - } catch (UnsupportedEncodingException e) { - outV.vector[i] = null; - outV.isNull[i] = true; - } - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATE) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); - return b.build(); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java index e27ac6a..7ae03d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java @@ -18,44 +18,29 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hive.common.util.DateParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator; -import org.apache.hadoop.io.Text; -import org.apache.hive.common.util.DateUtils; +import java.sql.Date; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.text.ParseException; - -public class VectorUDFDateString extends StringUnaryUDF { +/** + * Vectorized version of TO_DATE(STRING) + * As TO_DATE() now returns DATE type, this should be the same behavior as the DATE cast operator. + */ +public class VectorUDFDateString extends CastStringToDate { private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger( - VectorUDFDateString.class.getName()); - - public VectorUDFDateString(int colNum, int outputColumn) { - super(colNum, outputColumn, new StringUnaryUDF.IUDFUnaryString() { - Text t = new Text(); - final transient SimpleDateFormat formatter = DateUtils.getDateFormat(); + public VectorUDFDateString() { - @Override - public Text evaluate(Text s) { - if (s == null) { - return null; - } - try { - Date date = formatter.parse(s.toString()); - t.set(formatter.format(date)); - return t; - } catch (ParseException e) { - return null; - } - } - }); } - public VectorUDFDateString() { - super(); + public VectorUDFDateString(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateTimestamp.java index cde0be4..f0158dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateTimestamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateTimestamp.java @@ -27,12 +27,13 @@ import java.sql.Date; import java.text.SimpleDateFormat; -public class VectorUDFDateTimestamp extends TimestampToStringUnaryUDF { +/** + * Vectorized version of TO_DATE(timestamp). + * As TO_DATE() now returns DATE type, this should be the same behavior as the DATE cast operator. + */ +public class VectorUDFDateTimestamp extends CastTimestampToDate { private static final long serialVersionUID = 1L; - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private transient Date date = new Date(0); - public VectorUDFDateTimestamp() { super(); } @@ -40,35 +41,4 @@ public VectorUDFDateTimestamp() { public VectorUDFDateTimestamp(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); } - - @Override - protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { - switch (inputTypes[0]) { - case TIMESTAMP: - date.setTime(inV.getTime(i)); - break; - - default: - throw new Error("Unsupported input type " + inputTypes[0].name()); - } - try { - byte[] bytes = formatter.format(date).getBytes("UTF-8"); - outV.setRef(i, bytes, 0, bytes.length); - } catch (UnsupportedEncodingException e) { - outV.vector[i] = null; - outV.isNull[i] = true; - } - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.TIMESTAMP) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); - return b.build(); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java index 8c376a0..5a31e61 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java @@ -17,10 +17,8 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import java.sql.Date; import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -40,7 +38,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.DateParser; /** * UDFDate. @@ -53,13 +51,14 @@ + " '2009-07-30'") @VectorizedExpressions({VectorUDFDateString.class, VectorUDFDateLong.class, VectorUDFDateTimestamp.class}) public class GenericUDFDate extends GenericUDF { - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient TimestampConverter timestampConverter; private transient Converter textConverter; private transient Converter dateWritableConverter; private transient PrimitiveCategory inputType; private transient PrimitiveObjectInspector argumentOI; - private final Text output = new Text(); + private transient DateParser dateParser = new DateParser(); + private transient final DateWritable output = new DateWritable(); + private transient final Date date = new Date(0); @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -73,7 +72,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } argumentOI = (PrimitiveObjectInspector) arguments[0]; inputType = argumentOI.getPrimitiveCategory(); - ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector; switch (inputType) { case VOID: break; @@ -109,23 +108,21 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { case VOID: throw new UDFArgumentException("TO_DATE() received non-null object of VOID type"); case STRING: - Date date; String dateString = textConverter.convert(arguments[0].get()).toString(); - try { - date = formatter.parse(dateString); - } catch (ParseException e) { + if (dateParser.parseDate(dateString, date)) { + output.set(date); + } else { return null; } - output.set(formatter.format(date)); break; case TIMESTAMP: Timestamp ts = ((TimestampWritable) timestampConverter.convert(arguments[0].get())) .getTimestamp(); - output.set(formatter.format(ts)); + output.set(DateWritable.millisToDays(ts.getTime())); break; case DATE: DateWritable dw = (DateWritable) dateWritableConverter.convert(arguments[0].get()); - output.set(formatter.format(dw.get())); + output.set(dw); break; default: throw new UDFArgumentException( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java index 92a72bf..76337a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java @@ -17,11 +17,8 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import java.sql.Date; import java.sql.Timestamp; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -43,8 +40,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.IntWritable; +import org.apache.hive.common.util.DateParser; /** * UDFDateAdd. @@ -67,13 +64,14 @@ + " '2009-07-31'") @VectorizedExpressions({VectorUDFDateAddColScalar.class, VectorUDFDateAddScalarCol.class, VectorUDFDateAddColCol.class}) public class GenericUDFDateAdd extends GenericUDF { - private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient final DateParser dateParser = new DateParser(); + private transient final Date dateVal = new Date(0); private transient Converter dateConverter; private transient Converter daysConverter; private transient PrimitiveCategory inputType1; private transient PrimitiveCategory inputType2; - private final Calendar calendar = Calendar.getInstance(); - private final Text output = new Text(); + private final DateWritable output = new DateWritable(); + protected int signModifier = 1; // 1 for addition, -1 for subtraction @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -93,7 +91,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory(); - ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector; switch (inputType1) { case STRING: case VARCHAR: @@ -165,32 +163,32 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { return null; } + // Convert the first param into a DateWritable value switch (inputType1) { case STRING: String dateString = dateConverter.convert(arguments[0].get()).toString(); - try { - calendar.setTime(formatter.parse(dateString.toString())); - } catch (ParseException e) { + if (dateParser.parseDate(dateString, dateVal)) { + output.set(dateVal); + } else { return null; } break; case TIMESTAMP: Timestamp ts = ((TimestampWritable) dateConverter.convert(arguments[0].get())) .getTimestamp(); - calendar.setTime(ts); + output.set(DateWritable.millisToDays(ts.getTime())); break; case DATE: DateWritable dw = (DateWritable) dateConverter.convert(arguments[0].get()); - calendar.setTime(dw.get()); + output.set(dw.getDays()); break; default: throw new UDFArgumentException( "DATE_ADD() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1); } - calendar.add(Calendar.DAY_OF_MONTH, toBeAdded); - Date newDate = calendar.getTime(); - output.set(formatter.format(newDate)); + int newDays = output.getDays() + (signModifier * toBeAdded); + output.set(newDays); return output; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java index db0f4cd..74ea202 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java @@ -66,132 +66,11 @@ + " > SELECT _FUNC_('2009-07-30', 1) FROM src LIMIT 1;\n" + " '2009-07-29'") @VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class}) -public class GenericUDFDateSub extends GenericUDF { +public class GenericUDFDateSub extends GenericUDFDateAdd { private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private transient Converter dateConverter; - private transient Converter daysConverter; - private transient PrimitiveCategory inputType1; - private transient PrimitiveCategory inputType2; - private final Calendar calendar = Calendar.getInstance(); - private final Text output = new Text(); - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 2) { - throw new UDFArgumentLengthException( - "date_sub() requires 2 argument, got " + arguments.length); - } - if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, - "Only primitive type arguments are accepted but " - + arguments[0].getTypeName() + " is passed. as first arguments"); - } - if (arguments[1].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(1, - "Only primitive type arguments are accepted but " - + arguments[1].getTypeName() + " is passed. as second arguments"); - } - - inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory(); - ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; - switch (inputType1) { - case STRING: - case VARCHAR: - case CHAR: - inputType1 = PrimitiveCategory.STRING; - dateConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableStringObjectInspector); - break; - case TIMESTAMP: - dateConverter = new TimestampConverter((PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); - break; - case DATE: - dateConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[0], - PrimitiveObjectInspectorFactory.writableDateObjectInspector); - break; - default: - throw new UDFArgumentException( - "DATE_SUB() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " - + inputType1); - } - - inputType2 = ((PrimitiveObjectInspector) arguments[1]).getPrimitiveCategory(); - switch (inputType2) { - case BYTE: - daysConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[1], - PrimitiveObjectInspectorFactory.writableByteObjectInspector); - break; - case SHORT: - daysConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[1], - PrimitiveObjectInspectorFactory.writableShortObjectInspector); - break; - case INT: - daysConverter = ObjectInspectorConverters.getConverter( - (PrimitiveObjectInspector) arguments[1], - PrimitiveObjectInspectorFactory.writableIntObjectInspector); - break; - default: - throw new UDFArgumentException( - " DATE_ADD() only takes TINYINT/SMALLINT/INT/BIGINT types as second argument, got " + inputType2); - } - - return outputOI; - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - if (arguments[0].get() == null) { - return null; - } - - Object daysWritableObject = daysConverter.convert(arguments[1].get()); - if (daysWritableObject == null) { - return null; - } - - int toBeSubed; - if (daysWritableObject instanceof ByteWritable) { - toBeSubed = ((ByteWritable) daysWritableObject).get(); - } else if (daysWritableObject instanceof ShortWritable) { - toBeSubed = ((ShortWritable) daysWritableObject).get(); - } else if (daysWritableObject instanceof IntWritable) { - toBeSubed = ((IntWritable) daysWritableObject).get(); - } else { - return null; - } - - switch (inputType1) { - case STRING: - String dateString = dateConverter.convert(arguments[0].get()).toString(); - try { - calendar.setTime(formatter.parse(dateString.toString())); - } catch (ParseException e) { - return null; - } - break; - case TIMESTAMP: - Timestamp ts = ((TimestampWritable) dateConverter.convert(arguments[0].get())) - .getTimestamp(); - calendar.setTime(ts); - break; - case DATE: - DateWritable dw = (DateWritable) dateConverter.convert(arguments[0].get()); - calendar.setTime(dw.get()); - break; - default: - throw new UDFArgumentException( - "DATE_SUB() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1); - } - - calendar.add(Calendar.DAY_OF_MONTH, -toBeSubed); - Date newDate = calendar.getTime(); - output.set(formatter.format(newDate)); - return output; + public GenericUDFDateSub() { + this.signModifier = -1; } @Override diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java index 3f2b031..13bfdd7 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java @@ -28,7 +28,8 @@ import org.junit.Assert; import org.junit.Test; -import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.text.SimpleDateFormat; @@ -37,6 +38,7 @@ import java.util.Random; public class TestVectorGenericDateExpressions { + private Charset utf8 = StandardCharsets.UTF_8; private int size = 200; private Random random = new Random(); private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); @@ -88,12 +90,8 @@ private BytesColumnVector toString(LongColumnVector date) { } private byte[] toString(long date) { - try { - String formatted = formatter.format(new Date(DateWritable.daysToMillis((int) date))); - return formatted.getBytes("UTF-8"); - } catch (Exception e) { - throw new RuntimeException(e); - } + String formatted = formatter.format(new Date(DateWritable.daysToMillis((int) date))); + return formatted.getBytes(utf8); } private void validateDateAdd(VectorizedRowBatch batch, VectorExpression.Type colType1, long scalar2, @@ -106,20 +104,20 @@ private void validateDateAdd(VectorizedRowBatch batch, VectorExpression.Type col } udf.setInputTypes(colType1, VectorExpression.Type.OTHER); udf.evaluate(batch); - BytesColumnVector output = (BytesColumnVector) batch.cols[1]; + LongColumnVector output = (LongColumnVector) batch.cols[1]; try { for (int i = 0; i < size; i++) { String expected; if (isPositive) { - expected = new String(toString(date1.vector[i] + scalar2), "UTF-8"); + expected = new String(toString(date1.vector[i] + scalar2), utf8); } else { - expected = new String(toString(date1.vector[i] - scalar2), "UTF-8"); + expected = new String(toString(date1.vector[i] - scalar2), utf8); } if (date1.isNull[i]) { Assert.assertTrue(output.isNull[i]); } else { - String actual = new String(output.vector[i], output.start[i], output.start[i] + output.length[i], "UTF-8"); + String actual = new String(toString(output.vector[i])); Assert.assertEquals("expectedLen:" + expected.length() + " actualLen:" + actual.length(), expected, actual); } } @@ -149,7 +147,7 @@ private void testDateAddColScalar(VectorExpression.Type colType1, boolean isPosi LongColumnVector date1 = newRandomLongColumnVector(10000, size); ColumnVector col1 = castTo(date1, colType1); long scalar2 = newRandom(1000); - BytesColumnVector output = new BytesColumnVector(size); + LongColumnVector output = new LongColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = col1; @@ -169,13 +167,9 @@ public void testDateAddColScalar() { udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; bcv.start[0] = 0; bcv.length[0] = bytes.length; @@ -192,13 +186,9 @@ public void testDateSubColScalar() { udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; bcv.start[0] = 0; bcv.length[0] = bytes.length; @@ -245,20 +235,19 @@ private void validateDateAdd(VectorizedRowBatch batch, long scalar1, LongColumnV udf.setInputTypes(colType1, VectorExpression.Type.OTHER); udf.evaluate(batch); - BytesColumnVector output = (BytesColumnVector) batch.cols[1]; + LongColumnVector output = (LongColumnVector) batch.cols[1]; try { for (int i = 0; i < date2.vector.length; i++) { String expected; if (isPositive) { - expected = new String(toString(scalar1 + date2.vector[i]), "UTF-8"); + expected = new String(toString(scalar1 + date2.vector[i]), utf8); } else { - expected = new String(toString(scalar1 - date2.vector[i]), "UTF-8"); + expected = new String(toString(scalar1 - date2.vector[i]), utf8); } if (date2.isNull[i]) { Assert.assertTrue(output.isNull[i]); } else { - Assert.assertEquals(expected, - new String(output.vector[i], output.start[i], output.start[i] + output.length[i], "UTF-8")); + Assert.assertEquals(expected, new String(toString(output.vector[i]))); } } } catch (Exception e) { @@ -270,7 +259,7 @@ private void testDateAddScalarCol(VectorExpression.Type colType1, boolean isPosi LongColumnVector date2 = newRandomLongColumnVector(10000, size); long scalar1 = newRandom(1000); - BytesColumnVector output = new BytesColumnVector(size); + LongColumnVector output = new LongColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = date2; @@ -286,15 +275,11 @@ public void testDateAddScalarCol() { for (VectorExpression.Type scalarType1 : dateTimestampStringTypes) testDateAddScalarCol(scalarType1, true); - VectorExpression udf = null; - try { - udf = new VectorUDFDateAddScalarCol("error".getBytes("UTF-8"), 0, 1); - } catch (UnsupportedEncodingException e) { - } + VectorExpression udf = new VectorUDFDateAddScalarCol("error".getBytes(utf8), 0, 1); udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new LongColumnVector(1); - batch.cols[1] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); udf.evaluate(batch); Assert.assertEquals(batch.cols[1].isNull[0], true); } @@ -304,15 +289,11 @@ public void testDateSubScalarCol() { for (VectorExpression.Type scalarType1 : dateTimestampStringTypes) testDateAddScalarCol(scalarType1, false); - VectorExpression udf = null; - try { - udf = new VectorUDFDateSubScalarCol("error".getBytes("UTF-8"), 0, 1); - } catch (UnsupportedEncodingException e) { - } + VectorExpression udf = new VectorUDFDateSubScalarCol("error".getBytes(utf8), 0, 1); udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new LongColumnVector(1); - batch.cols[1] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); udf.evaluate(batch); Assert.assertEquals(batch.cols[1].isNull[0], true); } @@ -328,20 +309,19 @@ private void validateDateAdd(VectorizedRowBatch batch, } udf.setInputTypes(colType1, VectorExpression.Type.OTHER); udf.evaluate(batch); - BytesColumnVector output = (BytesColumnVector) batch.cols[2]; + LongColumnVector output = (LongColumnVector) batch.cols[2]; try { for (int i = 0; i < date2.vector.length; i++) { String expected; if (isPositive) { - expected = new String(toString(date1.vector[i] + date2.vector[i]), "UTF-8"); + expected = new String(toString(date1.vector[i] + date2.vector[i]), utf8); } else { - expected = new String(toString(date1.vector[i] - date2.vector[i]), "UTF-8"); + expected = new String(toString(date1.vector[i] - date2.vector[i]), utf8); } if (date1.isNull[i] || date2.isNull[i]) { Assert.assertTrue(output.isNull[i]); } else { - Assert.assertEquals(expected, - new String(output.vector[i], output.start[i], output.start[i] + output.length[i], "UTF-8")); + Assert.assertEquals(expected, new String(toString(output.vector[i]))); } } } catch (Exception e) { @@ -354,7 +334,7 @@ private void testDateAddColCol(VectorExpression.Type colType1, boolean isPositiv LongColumnVector days2 = newRandomLongColumnVector(1000, size); ColumnVector col1 = castTo(date1, colType1); - BytesColumnVector output = new BytesColumnVector(size); + LongColumnVector output = new LongColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(3, size); batch.cols[0] = col1; @@ -378,16 +358,12 @@ public void testDateAddColCol() { VectorExpression udf = new VectorUDFDateAddColCol(0, 1, 2); VectorizedRowBatch batch = new VectorizedRowBatch(3, 1); BytesColumnVector bcv; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1); - batch.cols[2] = new BytesColumnVector(1); + batch.cols[2] = new LongColumnVector(1); bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; bcv.start[0] = 0; @@ -404,16 +380,12 @@ public void testDateSubColCol() { VectorExpression udf = new VectorUDFDateSubColCol(0, 1, 2); VectorizedRowBatch batch = new VectorizedRowBatch(3, 1); BytesColumnVector bcv; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1); - batch.cols[2] = new BytesColumnVector(1); + batch.cols[2] = new LongColumnVector(1); bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; bcv.start[0] = 0; @@ -469,11 +441,7 @@ public void testDateDiffScalarCol() { } VectorExpression udf; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); udf = new VectorUDFDateDiffScalarCol(new Timestamp(0), 0, 1); @@ -540,11 +508,7 @@ public void testDateDiffColScalar() { } } VectorExpression udf; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); udf = new VectorUDFDateDiffColScalar(0, 0, 1); @@ -609,11 +573,7 @@ public void testDateDiffColCol() { VectorExpression udf = new VectorUDFDateDiffColCol(0, 1, 2); VectorizedRowBatch batch = new VectorizedRowBatch(3, 1); BytesColumnVector bcv; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); batch.cols[0] = new BytesColumnVector(1); @@ -650,18 +610,14 @@ private void validateDate(VectorizedRowBatch batch, VectorExpression.Type colTyp udf.setInputTypes(colType); udf.evaluate(batch); - BytesColumnVector output = (BytesColumnVector) batch.cols[1]; + LongColumnVector output = (LongColumnVector) batch.cols[1]; for (int i = 0; i < size; i++) { String actual; if (output.isNull[i]) { actual = null; } else { - try { - actual = new String(output.vector[i], output.start[i], output.length[i], "UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + actual = new String(toString(output.vector[i])); } if (date.isNull[i]) { Assert.assertTrue(output.isNull[i]); @@ -676,7 +632,7 @@ private void validateDate(VectorizedRowBatch batch, VectorExpression.Type colTyp public void testDate() { for (VectorExpression.Type colType : dateTimestampStringTypes) { LongColumnVector date = newRandomLongColumnVector(10000, size); - BytesColumnVector output = new BytesColumnVector(size); + LongColumnVector output = new LongColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = castTo(date, colType); @@ -692,13 +648,9 @@ public void testDate() { udf.setInputTypes(VectorExpression.Type.STRING); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; bcv.start[0] = 0; bcv.length[0] = bytes.length; @@ -755,11 +707,7 @@ public void testToDate() { batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; - byte[] bytes = new byte[0]; - try { - bytes = "error".getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - } + byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; bcv.start[0] = 0; bcv.length[0] = bytes.length; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDate.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDate.java index c9f566d..7706c07 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDate.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDate.java @@ -43,13 +43,13 @@ public void testStringToDate() throws HiveException { udf.initialize(arguments); DeferredObject valueObj = new DeferredJavaObject(new Text("2009-07-30")); DeferredObject[] args = {valueObj}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("to_date() test for STRING failed ", "2009-07-30", output.toString()); // Try with null args DeferredObject[] nullArgs = { new DeferredJavaObject(null) }; - output = (Text) udf.evaluate(nullArgs); + output = (DateWritable) udf.evaluate(nullArgs); assertNull("to_date() with null STRING", output); } @@ -62,13 +62,13 @@ public void testTimestampToDate() throws HiveException { DeferredObject valueObj = new DeferredJavaObject(new TimestampWritable(new Timestamp(109, 06, 30, 4, 17, 52, 0))); DeferredObject[] args = {valueObj}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("to_date() test for TIMESTAMP failed ", "2009-07-30", output.toString()); // Try with null args DeferredObject[] nullArgs = { new DeferredJavaObject(null) }; - output = (Text) udf.evaluate(nullArgs); + output = (DateWritable) udf.evaluate(nullArgs); assertNull("to_date() with null TIMESTAMP", output); } @@ -80,13 +80,13 @@ public void testDateWritablepToDate() throws HiveException { udf.initialize(arguments); DeferredObject valueObj = new DeferredJavaObject(new DateWritable(new Date(109, 06, 30))); DeferredObject[] args = {valueObj}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("to_date() test for DATEWRITABLE failed ", "2009-07-30", output.toString()); // Try with null args DeferredObject[] nullArgs = { new DeferredJavaObject(null) }; - output = (Text) udf.evaluate(nullArgs); + output = (DateWritable) udf.evaluate(nullArgs); assertNull("to_date() with null DATE", output); } @@ -97,7 +97,7 @@ public void testVoidToDate() throws HiveException { udf.initialize(arguments); DeferredObject[] args = { new DeferredJavaObject(null) }; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); // Try with null VOID assertNull("to_date() with null DATE ", output); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateAdd.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateAdd.java index 08b9604..6dc4c34 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateAdd.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateAdd.java @@ -44,7 +44,7 @@ public void testStringToDate() throws HiveException { DeferredObject valueObj1 = new DeferredJavaObject(new Text("2009-07-20 04:17:52")); DeferredObject valueObj2 = new DeferredJavaObject(new Integer("2")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_add() test for STRING failed ", "2009-07-22", output.toString()); @@ -70,7 +70,7 @@ public void testTimestampToDate() throws HiveException { 20, 4, 17, 52, 0))); DeferredObject valueObj2 = new DeferredJavaObject(new Integer("3")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_add() test for TIMESTAMP failed ", "2009-07-23", output.toString()); @@ -96,7 +96,7 @@ public void testDateWritablepToDate() throws HiveException { DeferredObject valueObj1 = new DeferredJavaObject(new DateWritable(new Date(109, 06, 20))); DeferredObject valueObj2 = new DeferredJavaObject(new Integer("4")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_add() test for DATEWRITABLE failed ", "2009-07-24", output.toString()); @@ -121,7 +121,7 @@ public void testByteDataTypeAsDays() throws HiveException { DeferredObject valueObj1 = new DeferredJavaObject(new DateWritable(new Date(109, 06, 20))); DeferredObject valueObj2 = new DeferredJavaObject(new Byte("4")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_add() test for BYTE failed ", "2009-07-24", output.toString()); } @@ -136,7 +136,7 @@ public void testShortDataTypeAsDays() throws HiveException { DeferredObject valueObj1 = new DeferredJavaObject(new DateWritable(new Date(109, 06, 20))); DeferredObject valueObj2 = new DeferredJavaObject(new Short("4")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_add() test for SHORT failed ", "2009-07-24", output.toString()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateSub.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateSub.java index ab8d109..a8b4e7f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateSub.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateSub.java @@ -44,7 +44,7 @@ public void testStringToDate() throws HiveException { DeferredObject valueObj1 = new DeferredJavaObject(new Text("2009-07-20 04:17:52")); DeferredObject valueObj2 = new DeferredJavaObject(new Integer("2")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_sub() test for STRING failed ", "2009-07-18", output.toString()); @@ -70,7 +70,7 @@ public void testTimestampToDate() throws HiveException { 20, 4, 17, 52, 0))); DeferredObject valueObj2 = new DeferredJavaObject(new Integer("3")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_sub() test for TIMESTAMP failed ", "2009-07-17", output.toString()); @@ -96,7 +96,7 @@ public void testDateWritablepToDate() throws HiveException { DeferredObject valueObj1 = new DeferredJavaObject(new DateWritable(new Date(109, 06, 20))); DeferredObject valueObj2 = new DeferredJavaObject(new Integer("4")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_sub() test for DATEWRITABLE failed ", "2009-07-16", output.toString()); @@ -121,7 +121,7 @@ public void testByteDataTypeAsDays() throws HiveException { DeferredObject valueObj1 = new DeferredJavaObject(new DateWritable(new Date(109, 06, 20))); DeferredObject valueObj2 = new DeferredJavaObject(new Byte("4")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_add() test for BYTE failed ", "2009-07-16", output.toString()); } @@ -136,7 +136,7 @@ public void testShortDataTypeAsDays() throws HiveException { DeferredObject valueObj1 = new DeferredJavaObject(new DateWritable(new Date(109, 06, 20))); DeferredObject valueObj2 = new DeferredJavaObject(new Short("4")); DeferredObject[] args = {valueObj1, valueObj2}; - Text output = (Text) udf.evaluate(args); + DateWritable output = (DateWritable) udf.evaluate(args); assertEquals("date_add() test for SHORT failed ", "2009-07-16", output.toString()); } diff --git a/ql/src/test/queries/clientpositive/union_offcbo.q b/ql/src/test/queries/clientpositive/union_offcbo.q index 66d4bee..dca9499 100644 --- a/ql/src/test/queries/clientpositive/union_offcbo.q +++ b/ql/src/test/queries/clientpositive/union_offcbo.q @@ -28,7 +28,7 @@ CREATE TABLE ttest2 ( explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -120,7 +120,7 @@ set hive.optimize.ppd=true; explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -212,7 +212,7 @@ set hive.optimize.ppd=true; explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -302,7 +302,7 @@ set hive.cbo.enable=false; set hive.optimize.ppd=false; explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -392,7 +392,7 @@ set hive.cbo.enable=true; set hive.optimize.ppd=false; explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out index 13f6ab4..4de8d01 100644 --- a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out +++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -42,7 +42,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (('1996-03-30' = to_date(l_shipdate)) and ('RAIL' = l_shipmode)) (type: boolean) + predicate: ((1996-03-30 = to_date(l_shipdate)) and ('RAIL' = l_shipmode)) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), (UDFToDouble(l_partkey) / 1000000.0) (type: double) diff --git a/ql/src/test/results/clientpositive/udf5.q.out b/ql/src/test/results/clientpositive/udf5.q.out index 860ebcb..3fa801a 100644 --- a/ql/src/test/results/clientpositive/udf5.q.out +++ b/ql/src/test/results/clientpositive/udf5.q.out @@ -33,9 +33,9 @@ STAGE PLANS: alias: dest1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: '2008-11-11 15:32:20' (type: string), '2008-11-11' (type: string), 1 (type: int), 11 (type: int), 2008 (type: int), 1 (type: int), 11 (type: int), 2008 (type: int) + expressions: '2008-11-11 15:32:20' (type: string), 2008-11-11 (type: date), 1 (type: int), 11 (type: int), 2008 (type: int), 1 (type: int), 11 (type: int), 2008 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 183 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT from_unixtime(1226446340), to_date(from_unixtime(1226446340)), day('2008-11-01'), month('2008-11-01'), year('2008-11-01'), day('2008-11-01 15:32:20'), month('2008-11-01 15:32:20'), year('2008-11-01 15:32:20') FROM dest1 diff --git a/ql/src/test/results/clientpositive/udf9.q.out b/ql/src/test/results/clientpositive/udf9.q.out index ddd4a5d..a9bd954 100644 --- a/ql/src/test/results/clientpositive/udf9.q.out +++ b/ql/src/test/results/clientpositive/udf9.q.out @@ -35,7 +35,7 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 86.0) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: -1 (type: int), 2 (type: int), 32 (type: int), -1 (type: int), '2009-01-01' (type: string), '2009-12-31' (type: string), '2008-03-01' (type: string), '2009-03-02' (type: string), '2008-02-28' (type: string), '2009-02-27' (type: string), '2008-12-31' (type: string), '2008-01-02' (type: string), '2008-02-26' (type: string), '2009-02-26' (type: string), '2006-02-28' (type: string), '2005-02-28' (type: string) + expressions: -1 (type: int), 2 (type: int), 32 (type: int), -1 (type: int), 2009-01-01 (type: date), 2009-12-31 (type: date), 2008-03-01 (type: date), 2009-03-02 (type: date), 2008-02-28 (type: date), 2009-02-27 (type: date), 2008-12-31 (type: date), 2008-01-02 (type: date), 2008-02-26 (type: date), 2009-02-26 (type: date), 2006-02-28 (type: date), 2005-02-28 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/union_offcbo.q.out b/ql/src/test/results/clientpositive/union_offcbo.q.out index 71c3bfc..38aaaa5 100644 --- a/ql/src/test/results/clientpositive/union_offcbo.q.out +++ b/ql/src/test/results/clientpositive/union_offcbo.q.out @@ -60,7 +60,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@ttest2 PREHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -148,7 +148,7 @@ PREHOOK: query: explain SELECT PREHOOK: type: QUERY POSTHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -403,7 +403,7 @@ STAGE PLANS: PREHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -491,7 +491,7 @@ PREHOOK: query: explain SELECT PREHOOK: type: QUERY POSTHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -740,7 +740,7 @@ STAGE PLANS: PREHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -828,7 +828,7 @@ PREHOOK: query: explain SELECT PREHOOK: type: QUERY POSTHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -1083,7 +1083,7 @@ STAGE PLANS: PREHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -1171,7 +1171,7 @@ PREHOOK: query: explain SELECT PREHOOK: type: QUERY POSTHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -1426,7 +1426,7 @@ STAGE PLANS: PREHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash @@ -1514,7 +1514,7 @@ PREHOOK: query: explain SELECT PREHOOK: type: QUERY POSTHOOK: query: explain SELECT A2.id1, A2.sts,A2.at1, - CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1 + CASE WHEN FLAG = 'A_INS' THEN cast(date_add('2015-11-20', 1) as string) ELSE '2015-11-20' END dt1 ,A2.dt2 ,A2.khash ,A2.rhash diff --git a/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index b8023e2..299d3bc 100644 --- a/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ b/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -243,7 +243,7 @@ STAGE PLANS: alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: string), date_add(fl_time, 2) (type: string), date_sub(fl_time, 2) (type: string), datediff(fl_time, '2000-01-01') (type: int) + expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -470,7 +470,7 @@ STAGE PLANS: alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: string), date_add(fl_date, 2) (type: string), date_sub(fl_date, 2) (type: string), datediff(fl_date, '2000-01-01') (type: int) + expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -912,7 +912,7 @@ STAGE PLANS: alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: string), to_date(date_sub(fl_date, 2)) (type: string), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) + expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Limit