diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 724ea45..e142d2c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -24,6 +24,8 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hive.common.util.DateParser; +import com.google.common.base.Preconditions; + import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; @@ -40,6 +42,7 @@ protected boolean isPositive = true; private transient final DateParser dateParser = new DateParser(); private transient final Date baseDate = new Date(0); + private boolean isNull = false; public VectorUDFDateAddScalarCol() { super(); @@ -51,11 +54,16 @@ public VectorUDFDateAddScalarCol(Object object, int colNum, int outputColumn) { this.outputColumn = outputColumn; if (object instanceof Long) { - this.longValue = (Long) object; + setLongValue((Long) object); + } else if (object instanceof Integer) { + setLongValue((Integer) object); } else if (object instanceof Timestamp) { - this.timestampValue = (Timestamp) object; + setTimeStampValue((Timestamp) object); } else if (object instanceof byte []) { - this.stringValue = (byte[]) object; + setStringValue((byte []) object); + } else { + assert object == null; + isNull = true; } } @@ -75,31 +83,10 @@ public void evaluate(VectorizedRowBatch batch) { switch (inputTypes[0]) { case DATE: - baseDate.setTime(DateWritable.daysToMillis((int) longValue)); - break; - case TIMESTAMP: - baseDate.setTime(timestampValue.getTime()); - break; - case STRING: case CHAR: case VARCHAR: - boolean parsed = dateParser.parseDate(new String(stringValue, StandardCharsets.UTF_8), baseDate); - if (!parsed) { - outV.noNulls = false; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = true; - } - } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = true; - } - } - return; - } break; default: throw new Error("Unsupported input type " + inputTypes[0].name()); @@ -113,6 +100,21 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; + if (isNull) { + outV.noNulls = false; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = true; + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = true; + } + } + return; + } + long baseDateDays = DateWritable.millisToDays(baseDate.getTime()); if (inputCol.noNulls) { outV.noNulls = true; @@ -187,6 +189,8 @@ public long getLongValue() { public void setLongValue(long longValue) { this.longValue = longValue; + Preconditions.checkArgument(longValue <= Integer.MAX_VALUE); + baseDate.setTime(DateWritable.daysToMillis((int) longValue)); } public byte[] getStringValue() { @@ -195,6 +199,19 @@ public void setLongValue(long longValue) { public void setStringValue(byte[] stringValue) { this.stringValue = stringValue; + boolean parsed = dateParser.parseDate(new String(stringValue, StandardCharsets.UTF_8), baseDate); + if (!parsed) { + isNull = true; + } + } + + public Timestamp getTimestampValue() { + return this.timestampValue; + } + + public void setTimeStampValue(Timestamp timestampValue) { + this.timestampValue = timestampValue; + baseDate.setTime(timestampValue.getTime()); } public boolean isPositive() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 71b3887..acde309 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -28,6 +27,8 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import com.google.common.base.Preconditions; + import java.sql.Date; import java.sql.Timestamp; import java.text.ParseException; @@ -44,6 +45,7 @@ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient final Text text = new Text(); private int baseDate; + private boolean isNull = false; private transient Date date = new Date(0); public VectorUDFDateDiffColScalar(int colNum, Object object, int outputColumn) { @@ -52,11 +54,16 @@ public VectorUDFDateDiffColScalar(int colNum, Object object, int outputColumn) { this.outputColumn = outputColumn; if (object instanceof Long) { - this.longValue = (Long) object; + setLongValue((Long) object); + } else if (object instanceof Integer) { + setLongValue((Integer) object); } else if (object instanceof Timestamp) { - this.timestampValue = (Timestamp) object; + setTimeStampValue((Timestamp) object); } else if (object instanceof byte []) { - this.stringValue = (byte []) object; + setStringValue((byte []) object); + } else { + assert object == null; + isNull = true; } } @@ -71,6 +78,17 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } + switch (inputTypes[1]) { + case DATE: + case TIMESTAMP: + case STRING: + case CHAR: + case VARCHAR: + break; + default: + throw new Error("Invalid input type #1: " + inputTypes[1].name()); + } + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; ColumnVector inputCol = batch.cols[this.colNum]; /* every line below this is identical for evaluateLong & evaluateString */ @@ -86,39 +104,20 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; - switch (inputTypes[1]) { - case DATE: - baseDate = (int) longValue; - break; - - case TIMESTAMP: - date.setTime(timestampValue.getTime()); - baseDate = DateWritable.dateToDays(date); - break; - - case STRING: - case CHAR: - case VARCHAR: - try { - date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime()); - baseDate = DateWritable.dateToDays(date); - break; - } catch (Exception e) { - outV.noNulls = false; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = true; - } - } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = true; - } - } - return; + if (isNull) { + // rhs is null + outV.noNulls = false; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = true; } - default: - throw new Error("Invalid input type #1: " + inputTypes[1].name()); + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = true; + } + } + return; } switch (inputTypes[0]) { @@ -284,8 +283,10 @@ public long getLongValue() { return longValue; } - public void setLongValue(int longValue) { + public void setLongValue(long longValue) { this.longValue = longValue; + Preconditions.checkArgument(longValue <= Integer.MAX_VALUE); + baseDate = (int) longValue; } public byte[] getStringValue() { @@ -294,8 +295,25 @@ public void setLongValue(int longValue) { public void setStringValue(byte[] stringValue) { this.stringValue = stringValue; + try { + date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime()); + baseDate = DateWritable.dateToDays(date); + } catch (Exception e) { + isNull = true; + } + } + + public Timestamp getTimestampValue() { + return this.timestampValue; + } + + public void setTimeStampValue(Timestamp timestampValue) { + this.timestampValue = timestampValue; + date.setTime(timestampValue.getTime()); + baseDate = DateWritable.dateToDays(date); } + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index c733bc9..8c41852 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -27,6 +27,8 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; +import com.google.common.base.Preconditions; + import java.sql.Date; import java.sql.Timestamp; import java.text.ParseException; @@ -43,6 +45,7 @@ private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient final Text text = new Text(); private int baseDate; + private boolean isNull = false; private transient Date date = new Date(0); public VectorUDFDateDiffScalarCol(Object object, int colNum, int outputColumn) { @@ -51,11 +54,16 @@ public VectorUDFDateDiffScalarCol(Object object, int colNum, int outputColumn) { this.outputColumn = outputColumn; if (object instanceof Long) { - this.longValue = (Long) object; + setLongValue((Long) object); + } else if (object instanceof Integer) { + setLongValue((Integer) object); } else if (object instanceof Timestamp) { - this.timestampValue = (Timestamp) object; + setTimeStampValue((Timestamp) object); } else if (object instanceof byte []) { - this.stringValue = (byte[]) object; + setStringValue((byte []) object); + } else { + assert object == null; + isNull = true; } } @@ -70,6 +78,17 @@ public void evaluate(VectorizedRowBatch batch) { super.evaluateChildren(batch); } + switch (inputTypes[0]) { + case DATE: + case TIMESTAMP: + case STRING: + case CHAR: + case VARCHAR: + break; + default: + throw new Error("Unsupported input type " + inputTypes[0].name()); + } + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; ColumnVector inputCol = batch.cols[this.colNum]; /* every line below this is identical for evaluateLong & evaluateString */ @@ -85,39 +104,20 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; - switch (inputTypes[0]) { - case DATE: - baseDate = (int) longValue; - break; - - case TIMESTAMP: - date.setTime(timestampValue.getTime()); - baseDate = DateWritable.dateToDays(date); - break; - - case STRING: - case CHAR: - case VARCHAR: - try { - date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime()); - baseDate = DateWritable.dateToDays(date); - break; - } catch (Exception e) { - outV.noNulls = false; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = true; - } - } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = true; - } - } - return; + if (isNull) { + // rhs is null + outV.noNulls = false; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = true; } - default: - throw new Error("Unsupported input type " + inputTypes[0].name()); + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = true; + } + } + return; } switch (inputTypes[1]) { @@ -257,6 +257,7 @@ protected void evaluateString(ColumnVector columnVector, LongColumnVector output output.isNull[i] = true; } } + @Override public int getOutputColumn() { return this.outputColumn; @@ -283,8 +284,10 @@ public long getLongValue() { return longValue; } - public void setLongValue(int longValue) { + public void setLongValue(long longValue) { this.longValue = longValue; + Preconditions.checkArgument(longValue <= Integer.MAX_VALUE); + baseDate = (int) longValue; } public byte[] getStringValue() { @@ -293,6 +296,22 @@ public void setLongValue(int longValue) { public void setStringValue(byte[] stringValue) { this.stringValue = stringValue; + try { + date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime()); + baseDate = DateWritable.dateToDays(date); + } catch (Exception e) { + isNull = true; + } + } + + public Timestamp getTimestampValue() { + return this.timestampValue; + } + + public void setTimeStampValue(Timestamp timestampValue) { + this.timestampValue = timestampValue; + date.setTime(timestampValue.getTime()); + baseDate = DateWritable.dateToDays(date); } @Override diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java index 13bfdd7..6b2cc19 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java @@ -512,7 +512,7 @@ public void testDateDiffColScalar() { VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); udf = new VectorUDFDateDiffColScalar(0, 0, 1); - udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING); + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1);