diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 842994e..b18ac2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -727,11 +727,13 @@ private VectorExpression getVectorExpressionForUdf(Class udf, List vectorClass, List childExpr, Mode childrenMode, TypeInfo returnType) throws HiveException { int numChildren = childExpr == null ? 0: childExpr.size(); + VectorExpression.Type [] inputTypes = new VectorExpression.Type[numChildren]; List children = new ArrayList(); Object[] arguments = new Object[numChildren]; try { for (int i = 0; i < numChildren; i++) { ExprNodeDesc child = childExpr.get(i); + inputTypes[i] = VectorExpression.Type.getValue(child.getTypeInfo().getTypeName()); if (child instanceof ExprNodeGenericFuncDesc) { VectorExpression vChild = getVectorExpression(child, childrenMode); children.add(vChild); @@ -751,6 +753,7 @@ private VectorExpression createVectorExpression(Class vectorClass, } } VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, arguments); + vectorExpression.setInputTypes(inputTypes); if ((vectorExpression != null) && !children.isEmpty()) { vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); } @@ -1234,7 +1237,7 @@ public static boolean isStringFamily(String resultType) { } public static boolean isDatetimeFamily(String resultType) { - return resultType.equalsIgnoreCase("timestamp"); + return resultType.equalsIgnoreCase("timestamp") || resultType.equalsIgnoreCase("date"); } // return true if this is any kind of float diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 80bf671..24dc308 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -147,6 +148,17 @@ public static void AddRowToBatch(Object row, StructObjectInspector oi, int rowIn } } break; + case DATE: { + LongColumnVector lcv = (LongColumnVector) batch.cols[i]; + if (writableCol != null) { + lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays(); + lcv.isNull[rowIndex] = false; + } else { + lcv.vector[rowIndex] = 1; + SetNullColIsNullValue(lcv, rowIndex); + } + } + break; case FLOAT: { DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[i]; if (writableCol != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java index 69553d9..0fd4983 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java @@ -29,7 +29,9 @@ import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyDate; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; @@ -166,6 +168,11 @@ public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspe tw.set(t); LazyTimestamp.writeUTF8(serializeVectorStream, tw); break; + case DATE: + LongColumnVector dacv = (LongColumnVector) batch.cols[k]; + DateWritable daw = new DateWritable((int) dacv.vector[rowIndex]); + LazyDate.writeUTF8(serializeVectorStream, daw); + break; default: throw new UnsupportedOperationException( "Vectorizaton is not supported for datatype:" diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java new file mode 100644 index 0000000..09b494f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +import java.sql.Date; + +/** + * Casts a timestamp and date vector to a date vector. + */ +public class CastLongToDate extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int inputColumn; + private int outputColumn; + private transient Date date = new Date(0); + + public CastLongToDate() { + super(); + } + + public CastLongToDate(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inV = (LongColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + switch (inputTypes[0]) { + case TIMESTAMP: + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + date.setTime(inV.vector[0] / 1000000); + outV.vector[0] = DateWritable.dateToDays(date); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + date.setTime(inV.vector[i] / 1000000); + outV.vector[i] = DateWritable.dateToDays(date); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + date.setTime(inV.vector[i] / 1000000); + outV.vector[i] = DateWritable.dateToDays(date); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + date.setTime(inV.vector[0] / 1000000); + outV.vector[0] = DateWritable.dateToDays(date); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + date.setTime(inV.vector[i] / 1000000); + outV.vector[i] = DateWritable.dateToDays(date); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + date.setTime(inV.vector[i] / 1000000); + outV.vector[i] = DateWritable.dateToDays(date); + } + } + outV.isRepeating = false; + } + } + break; + + case DATE: + inV.copySelected(batch.selectedInUse, batch.selected, batch.size, outV); + break; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "date"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java new file mode 100644 index 0000000..1d07615 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +import java.util.Date; +import java.text.SimpleDateFormat; + +/** + * Casts a string vector to a date vector. + */ +public class CastStringToDate extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int inputColumn; + private int outputColumn; + private transient java.sql.Date sqlDate = new java.sql.Date(0); + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + + public CastStringToDate() { + + } + + public CastStringToDate(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + evaluate(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + evaluate(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + evaluate(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + evaluate(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + evaluate(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + evaluate(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { + try { + Date utilDate = formatter.parse(new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); + sqlDate.setTime(utilDate.getTime()); + outV.vector[i] = DateWritable.dateToDays(sqlDate); + } catch (Exception e) { + outV.vector[i] = 1; + outV.isNull[i] = true; + outV.noNulls = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "date"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 6dac109..726413c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.Serializable; +import java.util.Map; +import com.google.common.collect.ImmutableMap; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -27,6 +29,21 @@ * Base class for expressions. */ public abstract class VectorExpression implements Serializable { + public enum Type { + STRING, TIMESTAMP, DATE, OTHER; + private static Map types = ImmutableMap.builder() + .put("string", STRING) + .put("timestamp", TIMESTAMP) + .put("date", DATE) + .build(); + + public static Type getValue(String name) { + if (types.containsKey(name.toLowerCase())) { + return types.get(name); + } + return OTHER; + } + } private static final long serialVersionUID = 1L; /** @@ -35,6 +52,11 @@ protected VectorExpression [] childExpressions = null; /** + * More detailed input types, such as date and timestamp. + */ + protected Type [] inputTypes; + + /** * Output type of the expression. */ protected String outputType; @@ -70,6 +92,7 @@ public void setOutputType(String type) { * Initialize the child expressions. */ public void setChildExpressions(VectorExpression [] ve) { + childExpressions = ve; } @@ -91,6 +114,21 @@ final protected void evaluateChildren(VectorizedRowBatch vrg) { } } + /** + * Set more detailed types to distinguish certain types that is represented in same + * {@link org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType}s. For example, date and + * timestamp will be in {@link org.apache.hadoop.hive.ql.exec.vector.LongColumnVector} but they need to be + * distinguished. + * @param inputTypes + */ + public void setInputTypes(Type ... inputTypes) { + this.inputTypes = inputTypes; + } + + public Type [] getInputTypes() { + return inputTypes; + } + @Override public String toString() { StringBuilder b = new StringBuilder(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java new file mode 100644 index 0000000..fa1dfcf --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.io.Text; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; + +public class VectorUDFDateAddColCol extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + protected boolean isPositive = true; + private transient final Calendar calendar = Calendar.getInstance(); + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient final Text text = new Text(); + + public VectorUDFDateAddColCol(int colNum1, int colNum2, int outputColumn) { + this(); + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public VectorUDFDateAddColCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + ColumnVector inputColVector1 = batch.cols[colNum1]; + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector2 = inputColVector2.vector; + + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + byte[][] outputVector = outV.vector; + if (n <= 0) { + // Nothing to do + return; + } + + // Handle null + NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse); + + switch (inputTypes[0]) { + case DATE: + // Now disregard null in second pass. + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + outV.isRepeating = true; + outputVector[0] = evaluateDate(inputColVector1, 0, vector2[0]); + outV.start[0] = 0; + outV.length[0] = outputVector[0].length; + } else if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = evaluateDate(inputColVector1, i, vector2[i]); + outV.start[i] = 0; + outV.length[i] = outputVector[0].length; + } + } else { + for (int i = 0; i != n; i++) { + outputVector[i] = evaluateDate(inputColVector1, i, vector2[i]); + outV.start[i] = 0; + outV.length[i] = outputVector[0].length; + } + } + break; + + case TIMESTAMP: + // Now disregard null in second pass. + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + outV.isRepeating = true; + outputVector[0] = evaluateTimestamp(inputColVector1, 0, vector2[0]); + } else if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = evaluateTimestamp(inputColVector1, i, vector2[i]); + outV.start[i] = 0; + outV.length[i] = outputVector[0].length; + } + } else { + for (int i = 0; i != n; i++) { + outputVector[i] = evaluateTimestamp(inputColVector1, i, vector2[i]); + outV.start[i] = 0; + outV.length[i] = outputVector[0].length; + } + } + break; + + case STRING: + // Now disregard null in second pass. + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + outV.isRepeating = true; + evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, 0); + } else if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, i); + } + } else { + for (int i = 0; i != n; i++) { + evaluateString((BytesColumnVector) inputColVector1, inputColVector2, outV, i); + } + } + break; + } + } + + protected byte[] evaluateDate(ColumnVector columnVector, int index, long numDays) { + LongColumnVector lcv = (LongColumnVector) columnVector; + if (isPositive) { + calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] + (int) numDays)); + } else { + calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] - (int) numDays)); + } + Date newDate = calendar.getTime(); + text.set(formatter.format(newDate)); + return Arrays.copyOf(text.getBytes(), text.getLength()); + } + + protected byte[] evaluateTimestamp(ColumnVector columnVector, int index, long numDays) { + LongColumnVector lcv = (LongColumnVector) columnVector; + calendar.setTimeInMillis(lcv.vector[index] / 1000000); + if (isPositive) { + calendar.add(Calendar.DATE, (int) numDays); + } else { + calendar.add(Calendar.DATE, (int) -numDays); + } + Date newDate = calendar.getTime(); + text.set(formatter.format(newDate)); + return Arrays.copyOf(text.getBytes(), text.getLength()); + } + + protected void evaluateString(BytesColumnVector inputColumnVector1, LongColumnVector inputColumnVector2, + BytesColumnVector outputVector, int i) { + if (inputColumnVector1.isNull[i] || inputColumnVector2.isNull[i]) { + outputVector.isNull[i] = true; + } else { + text.set(inputColumnVector1.vector[i], inputColumnVector1.start[i], inputColumnVector1.length[i]); + try { + calendar.setTime(formatter.parse(text.toString())); + } catch (ParseException e) { + outputVector.isNull[i] = true; + } + if (isPositive) { + calendar.add(Calendar.DATE, (int) inputColumnVector2.vector[i]); + } else { + calendar.add(Calendar.DATE, -(int) inputColumnVector2.vector[i]); + } + Date newDate = calendar.getTime(); + text.set(formatter.format(newDate)); + + outputVector.vector[i] = Arrays.copyOf(text.getBytes(), text.getLength()); + outputVector.start[i] = 0; + outputVector.length[i] = text.getLength(); + } + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "string"; + } + + public int getColNum1() { + return colNum1; + } + + public void setColNum1(int colNum1) { + this.colNum1 = colNum1; + } + + public int getColNum2() { + return colNum2; + } + + public void setColNum2(int colNum2) { + this.colNum2 = colNum2; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.ANY, + VectorExpressionDescriptor.ArgumentType.LONG) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java new file mode 100644 index 0000000..6578907 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -0,0 +1,291 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.io.Text; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; + +public class VectorUDFDateAddColScalar extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + private int numDays; + protected boolean isPositive = true; + private transient final Calendar calendar = Calendar.getInstance(); + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient final Text text = new Text(); + + public VectorUDFDateAddColScalar(int colNum, long numDays, int outputColumn) { + super(); + this.colNum = colNum; + this.numDays = (int) numDays; + this.outputColumn = outputColumn; + } + + public VectorUDFDateAddColScalar() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + ColumnVector inputCol = batch.cols[this.colNum]; + /* every line below this is identical for evaluateLong & evaluateString */ + final int n = inputCol.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + + if(batch.size == 0) { + /* n != batch.size when isRepeating */ + return; + } + + /* true for all algebraic UDFs with no state */ + outV.isRepeating = inputCol.isRepeating; + + switch (inputTypes[0]) { + case DATE: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = evaluateDate(inputCol, i); + outV.start[i] = 0; + outV.length[i] = outV.vector[i].length; + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = evaluateDate(inputCol, i); + outV.start[i] = 0; + outV.length[i] = outV.vector[i].length; + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateDate(inputCol, i); + outV.start[i] = 0; + outV.length[i] = outV.vector[i].length; + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateDate(inputCol, i); + outV.start[i] = 0; + outV.length[i] = outV.vector[i].length; + } + } + } + } + break; + + case TIMESTAMP: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = evaluateTimestamp(inputCol, i); + outV.start[i] = 0; + outV.length[i] = outV.vector[i].length; + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + outV.start[i] = 0; + outV.length[i] = outV.vector[i].length; + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + outV.start[i] = 0; + outV.length[i] = outV.vector[i].length; + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + outV.start[i] = 0; + outV.length[i] = outV.vector[i].length; + } + } + } + } + break; + + case STRING: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + evaluateString(inputCol, outV, i); + } + } else { + for(int i = 0; i < n; i++) { + evaluateString(inputCol, outV, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + evaluateString(inputCol, outV, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + evaluateString(inputCol, outV, i); + } + } + } + } + break; + } + } + + protected byte[] evaluateTimestamp(ColumnVector columnVector, int index) { + LongColumnVector lcv = (LongColumnVector) columnVector; + calendar.setTimeInMillis(lcv.vector[index] / 1000000); + if (isPositive) { + calendar.add(Calendar.DATE, numDays); + } else { + calendar.add(Calendar.DATE, -numDays); + } + Date newDate = calendar.getTime(); + text.set(formatter.format(newDate)); + return Arrays.copyOf(text.getBytes(), text.getLength()); + } + + protected byte[] evaluateDate(ColumnVector columnVector, int index) { + LongColumnVector lcv = (LongColumnVector) columnVector; + if (isPositive) { + calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] + numDays)); + } else { + calendar.setTimeInMillis(DateWritable.daysToMillis((int) lcv.vector[index] - numDays)); + } + Date newDate = calendar.getTime(); + text.set(formatter.format(newDate)); + return Arrays.copyOf(text.getBytes(), text.getLength()); + } + + protected void evaluateString(ColumnVector columnVector, BytesColumnVector outputVector, int i) { + BytesColumnVector bcv = (BytesColumnVector) columnVector; + text.set(bcv.vector[i], bcv.start[i], bcv.length[i]); + try { + calendar.setTime(formatter.parse(text.toString())); + } catch (ParseException e) { + outputVector.isNull[i] = true; + } + if (isPositive) { + calendar.add(Calendar.DATE, numDays); + } else { + calendar.add(Calendar.DATE, -numDays); + } + Date newDate = calendar.getTime(); + text.set(formatter.format(newDate)); + + byte[] bytes = text.getBytes(); + int size = text.getLength(); + outputVector.vector[i] = Arrays.copyOf(bytes, size); + outputVector.start[i] = 0; + outputVector.length[i] = size; + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "string"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getNumDays() { + return numDays; + } + + public void setNumDay(int numDays) { + this.numDays = numDays; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.ANY, + VectorExpressionDescriptor.ArgumentType.LONG) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java new file mode 100644 index 0000000..d1156b6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -0,0 +1,218 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.io.Text; + +import java.io.UnsupportedEncodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; + +public class VectorUDFDateAddScalarCol extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + private long longValue = 0; + private byte[] stringValue = null; + protected boolean isPositive = true; + private transient final Calendar calendar = Calendar.getInstance(); + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient final Text text = new Text(); + private transient Date baseDate = new Date(); + + public VectorUDFDateAddScalarCol() { + super(); + } + + public VectorUDFDateAddScalarCol(Object object, int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + + if (object instanceof Long) { + this.longValue = (Long) object; + } else if (object instanceof byte []) { + this.stringValue = (byte[]) object; + } + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + LongColumnVector inputCol = (LongColumnVector) batch.cols[this.colNum]; + /* every line below this is identical for evaluateLong & evaluateString */ + final int n = inputCol.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + + switch (inputTypes[0]) { + case DATE: + baseDate.setTime(DateWritable.daysToMillis((int) longValue)); + break; + + case TIMESTAMP: + baseDate.setTime(longValue / 1000000); + break; + + case STRING: + try { + baseDate = formatter.parse(new String(stringValue, "UTF-8")); + break; + } catch (Exception e) { + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = true; + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = true; + } + } + return; + } + } + + if(batch.size == 0) { + /* n != batch.size when isRepeating */ + return; + } + + /* true for all algebraic UDFs with no state */ + outV.isRepeating = inputCol.isRepeating; + + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + evaluate(baseDate, inputCol.vector[i], outV, i); + } + } else { + for(int i = 0; i < n; i++) { + evaluate(baseDate, inputCol.vector[i], outV, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + evaluate(baseDate, inputCol.vector[i], outV, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + evaluate(baseDate, inputCol.vector[i], outV, i); + } + } + } + } + } + + private void evaluate(Date baseDate, long numDays, BytesColumnVector output, int i) { + calendar.setTime(baseDate); + + if (isPositive) { + calendar.add(Calendar.DATE, (int) numDays); + } else { + calendar.add(Calendar.DATE, -(int) numDays); + } + Date newDate = calendar.getTime(); + text.set(formatter.format(newDate)); + int size = text.getLength(); + output.vector[i] = Arrays.copyOf(text.getBytes(), size); + output.start[i] = 0; + output.length[i] = size; + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "string"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public long getLongValue() { + return longValue; + } + + public void setLongValue(long longValue) { + this.longValue = longValue; + } + + public byte[] getStringValue() { + return stringValue; + } + + public void setStringValue(byte[] stringValue) { + this.stringValue = stringValue; + } + + public boolean isPositive() { + return isPositive; + } + + public void setPositive(boolean isPositive) { + this.isPositive = isPositive; + } + + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.ANY, + VectorExpressionDescriptor.ArgumentType.LONG) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java new file mode 100644 index 0000000..7af1b75 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java @@ -0,0 +1,226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +import java.sql.Date; +import java.text.ParseException; +import java.text.SimpleDateFormat; + +public class VectorUDFDateDiffColCol extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient Date date = new Date(0); + private transient LongColumnVector dateVector1 = new LongColumnVector(); + private transient LongColumnVector dateVector2 = new LongColumnVector(); + + public VectorUDFDateDiffColCol(int colNum1, int colNum2, int outputColumn) { + this(); + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public VectorUDFDateDiffColCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + ColumnVector inputColVector1 = batch.cols[colNum1]; + ColumnVector inputColVector2 = batch.cols[colNum2]; + int[] sel = batch.selected; + int n = batch.size; + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; + if (n <= 0) { + // Nothing to do + return; + } + + NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse); + + LongColumnVector convertedVector1 = toDateArray1(inputTypes[0], inputColVector1, batch.size); + LongColumnVector convertedVector2 = toDateArray2(inputTypes[1], inputColVector2, batch.size); + + // Now disregard null in second pass. + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + outV.isRepeating = true; + if (convertedVector1.isNull[0] || convertedVector2.isNull[0]) { + outV.isNull[0] = true; + } else { + outputVector[0] = convertedVector1.vector[0] - convertedVector2.vector[0]; + } + } else if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (convertedVector1.isNull[i] || convertedVector2.isNull[i]) { + outV.isNull[i] = true; + } else { + outputVector[i] = convertedVector1.vector[i] - convertedVector2.vector[i]; + } + } + } else { + for (int i = 0; i != n; i++) { + if (convertedVector1.isNull[i] || convertedVector2.isNull[i]) { + outV.isNull[i] = true; + } else { + outputVector[i] = convertedVector1.vector[i] - convertedVector2.vector[i]; + } + } + } + } + + private LongColumnVector toDateArray1(Type colType, ColumnVector inputColVector, int size) { + switch (colType) { + case DATE: + return (LongColumnVector) inputColVector; + + case TIMESTAMP: + if (size > dateVector1.vector.length) { + dateVector1 = new LongColumnVector(size * 2); + } + + LongColumnVector lcv = (LongColumnVector) inputColVector; + for (int i = 0; i < size; i++) { + date.setTime(lcv.vector[i] / 1000000); + dateVector1.vector[i] = DateWritable.dateToDays(date); + } + return dateVector1; + + case STRING: + if (size > dateVector1.vector.length) { + dateVector1 = new LongColumnVector(size * 2); + } + + BytesColumnVector bcv = (BytesColumnVector) inputColVector; + for (int i = 0; i < size; i++) { + if (!bcv.isNull[i]) { + String string = new String(bcv.vector[i], bcv.start[i], bcv.length[i]); + try { + date.setTime(formatter.parse(string).getTime()); + dateVector1.vector[i] = DateWritable.dateToDays(date); + } catch (ParseException e) { + dateVector1.isNull[i] = true; + } + } + } + return dateVector1; + } + + return null; + } + + private LongColumnVector toDateArray2(Type colType, ColumnVector inputColVector, int size) { + switch (colType) { + case DATE: + return (LongColumnVector) inputColVector; + + case TIMESTAMP: + if (size > dateVector2.vector.length) { + dateVector2 = new LongColumnVector(size * 2); + } + + LongColumnVector lcv = (LongColumnVector) inputColVector; + for (int i = 0; i < size; i++) { + date.setTime(lcv.vector[i] / 1000000); + dateVector2.vector[i] = DateWritable.dateToDays(date); + } + return dateVector2; + + case STRING: + if (size > dateVector2.vector.length) { + dateVector2 = new LongColumnVector(size * 2); + } + + BytesColumnVector bcv = (BytesColumnVector) inputColVector; + for (int i = 0; i < size; i++) { + if (!bcv.isNull[i]) { + String string = new String(bcv.vector[i], bcv.start[i], bcv.length[i]); + try { + date.setTime(formatter.parse(string).getTime()); + dateVector2.vector[i] = DateWritable.dateToDays(date); + } catch (ParseException e) { + dateVector2.isNull[i] = true; + } + } + } + return dateVector2; + } + + return null; + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum1() { + return colNum1; + } + + public void setColNum1(int colNum1) { + this.colNum1 = colNum1; + } + + public int getColNum2() { + return colNum2; + } + + public void setColNum2(int colNum2) { + this.colNum2 = colNum2; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.ANY, + VectorExpressionDescriptor.ArgumentType.ANY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java new file mode 100644 index 0000000..05b71ac --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -0,0 +1,292 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.io.Text; + +import java.sql.Date; +import java.text.ParseException; +import java.text.SimpleDateFormat; + +public class VectorUDFDateDiffColScalar extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + private long longValue; + private byte[] stringValue; + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient final Text text = new Text(); + private int baseDate; + private transient Date date = new Date(0); + + public VectorUDFDateDiffColScalar(int colNum, Object object, int outputColumn) { + super(); + this.colNum = colNum; + this.outputColumn = outputColumn; + + if (object instanceof Long) { + this.longValue = (Long) object; + } else if (object instanceof byte []) { + this.stringValue = (byte []) object; + } + } + + public VectorUDFDateDiffColScalar() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + ColumnVector inputCol = batch.cols[this.colNum]; + /* every line below this is identical for evaluateLong & evaluateString */ + final int n = inputCol.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + + if(batch.size == 0) { + /* n != batch.size when isRepeating */ + return; + } + + /* true for all algebraic UDFs with no state */ + outV.isRepeating = inputCol.isRepeating; + + switch (inputTypes[1]) { + case DATE: + baseDate = (int) longValue; + break; + + case TIMESTAMP: + date.setTime(longValue / 1000000); + baseDate = DateWritable.dateToDays(date); + break; + + case STRING: + try { + date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime()); + baseDate = DateWritable.dateToDays(date); + break; + } catch (Exception e) { + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = true; + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = true; + } + } + return; + } + } + + switch (inputTypes[0]) { + case DATE: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = evaluateDate(inputCol, i); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = evaluateDate(inputCol, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateDate(inputCol, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateDate(inputCol, i); + } + } + } + } + break; + + case TIMESTAMP: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = evaluateTimestamp(inputCol, i); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + } + } + } + } + break; + + case STRING: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + evaluateString(inputCol, outV, i); + } + } else { + for(int i = 0; i < n; i++) { + evaluateString(inputCol, outV, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + evaluateString(inputCol, outV, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + evaluateString(inputCol, outV, i); + } + } + } + } + break; + } + } + + protected int evaluateTimestamp(ColumnVector columnVector, int index) { + LongColumnVector lcv = (LongColumnVector) columnVector; + date.setTime(lcv.vector[index] / 1000000); + return DateWritable.dateToDays(date) - baseDate; + } + + protected int evaluateDate(ColumnVector columnVector, int index) { + LongColumnVector lcv = (LongColumnVector) columnVector; + return ((int) lcv.vector[index]) - baseDate; + } + + protected void evaluateString(ColumnVector columnVector, LongColumnVector output, int i) { + BytesColumnVector bcv = (BytesColumnVector) columnVector; + text.set(bcv.vector[i], bcv.start[i], bcv.length[i]); + try { + date.setTime(formatter.parse(text.toString()).getTime()); + output.vector[i] = DateWritable.dateToDays(date) - baseDate; + } catch (ParseException e) { + output.vector[i] = 1; + output.isNull[i] = true; + } + } + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public long getLongValue() { + return longValue; + } + + public void setLongValue(int longValue) { + this.longValue = longValue; + } + + public byte[] getStringValue() { + return stringValue; + } + + public void setStringValue(byte[] stringValue) { + this.stringValue = stringValue; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.ANY, + VectorExpressionDescriptor.ArgumentType.ANY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java new file mode 100644 index 0000000..7c76901 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -0,0 +1,292 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.io.Text; + +import java.sql.Date; +import java.text.ParseException; +import java.text.SimpleDateFormat; + +public class VectorUDFDateDiffScalarCol extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + private long longValue; + private byte[] stringValue; + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient final Text text = new Text(); + private int baseDate; + private transient Date date = new Date(0); + + public VectorUDFDateDiffScalarCol(Object object, int colNum, int outputColumn) { + super(); + this.colNum = colNum; + this.outputColumn = outputColumn; + + if (object instanceof Long) { + this.longValue = (Long) object; + } else if (object instanceof byte []) { + this.stringValue = (byte[]) object; + } + } + + public VectorUDFDateDiffScalarCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + ColumnVector inputCol = batch.cols[this.colNum]; + /* every line below this is identical for evaluateLong & evaluateString */ + final int n = inputCol.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + + if(batch.size == 0) { + /* n != batch.size when isRepeating */ + return; + } + + /* true for all algebraic UDFs with no state */ + outV.isRepeating = inputCol.isRepeating; + + switch (inputTypes[0]) { + case DATE: + baseDate = (int) longValue; + break; + + case TIMESTAMP: + date.setTime(longValue / 1000000); + baseDate = DateWritable.dateToDays(date); + break; + + case STRING: + try { + date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime()); + baseDate = DateWritable.dateToDays(date); + break; + } catch (Exception e) { + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = true; + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = true; + } + } + return; + } + } + + switch (inputTypes[1]) { + case DATE: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = evaluateDate(inputCol, i); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = evaluateDate(inputCol, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateDate(inputCol, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateDate(inputCol, i); + } + } + } + } + break; + + case TIMESTAMP: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = evaluateTimestamp(inputCol, i); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = evaluateTimestamp(inputCol, i); + } + } + } + } + break; + + case STRING: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + evaluateString(inputCol, outV, i); + } + } else { + for(int i = 0; i < n; i++) { + evaluateString(inputCol, outV, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + evaluateString(inputCol, outV, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + evaluateString(inputCol, outV, i); + } + } + } + } + break; + } + } + + protected int evaluateTimestamp(ColumnVector columnVector, int index) { + LongColumnVector lcv = (LongColumnVector) columnVector; + date.setTime(lcv.vector[index] / 1000000); + return baseDate - DateWritable.dateToDays(date); + } + + protected int evaluateDate(ColumnVector columnVector, int index) { + LongColumnVector lcv = (LongColumnVector) columnVector; + return baseDate - ((int) lcv.vector[index]); + } + + protected void evaluateString(ColumnVector columnVector, LongColumnVector output, int i) { + BytesColumnVector bcv = (BytesColumnVector) columnVector; + text.set(bcv.vector[i], bcv.start[i], bcv.length[i]); + try { + date.setTime(formatter.parse(text.toString()).getTime()); + output.vector[i] = baseDate - DateWritable.dateToDays(date); + } catch (ParseException e) { + output.vector[i] = 1; + output.isNull[i] = true; + } + } + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public long getLongValue() { + return longValue; + } + + public void setLongValue(int longValue) { + this.longValue = longValue; + } + + public byte[] getStringValue() { + return stringValue; + } + + public void setStringValue(byte[] stringValue) { + this.stringValue = stringValue; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.ANY, + VectorExpressionDescriptor.ArgumentType.ANY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java new file mode 100644 index 0000000..2ebd82e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +import java.io.UnsupportedEncodingException; +import java.sql.Date; +import java.text.SimpleDateFormat; + +public class VectorUDFDateLong extends LongToStringUnaryUDF { + private static final long serialVersionUID = 1L; + + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient Date date = new Date(0); + + public VectorUDFDateLong() { + super(); + } + + public VectorUDFDateLong(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(BytesColumnVector outV, long[] vector, int i) { + switch (inputTypes[0]) { + case DATE: + date.setTime(DateWritable.daysToMillis((int) vector[i])); + break; + + case TIMESTAMP: + date.setTime(vector[i] / 1000000); + break; + } + try { + byte[] bytes = formatter.format(date).getBytes("UTF-8"); + outV.setRef(i, bytes, 0, bytes.length); + } catch (UnsupportedEncodingException e) { + outV.vector[i] = null; + outV.isNull[i] = true; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java new file mode 100644 index 0000000..dd84de3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.io.Text; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +public class VectorUDFDateString extends StringUnaryUDF { + private static final long serialVersionUID = 1L; + + public VectorUDFDateString(int colNum, int outputColumn) { + super(colNum, outputColumn, new StringUnaryUDF.IUDFUnaryString() { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + Text t = new Text(); + + @Override + public Text evaluate(Text s) { + if (s == null) { + return null; + } + try { + Date date = formatter.parse(s.toString()); + t.set(formatter.format(date)) ; + return t; + } catch (ParseException e) { + return null; + } + } + }); + } + + public VectorUDFDateString() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColCol.java new file mode 100644 index 0000000..994d416 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColCol.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Returns the date that is num_days before start_date. + */ +public class VectorUDFDateSubColCol extends VectorUDFDateAddColCol { + public VectorUDFDateSubColCol(int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + isPositive = false; + } + + public VectorUDFDateSubColCol() { + super(); + isPositive = false; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColScalar.java new file mode 100644 index 0000000..e952f5f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubColScalar.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Returns the date that is num_days before start_date. + */ +public class VectorUDFDateSubColScalar extends VectorUDFDateAddColScalar { + public VectorUDFDateSubColScalar(int colNum, long numDays, int outputColumn) { + super(colNum, numDays, outputColumn); + isPositive = false; + } + + public VectorUDFDateSubColScalar() { + super(); + isPositive = false; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubScalarCol.java new file mode 100644 index 0000000..eccbb21 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateSubScalarCol.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +public class VectorUDFDateSubScalarCol extends VectorUDFDateAddScalarCol { + public VectorUDFDateSubScalarCol(Object object, int colNum, int outputColumn) { + super(object, colNum, outputColumn); + isPositive = false; + } + + public VectorUDFDateSubScalarCol() { + super(); + isPositive = false; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java index 21552e1..58724a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java @@ -37,9 +37,14 @@ public VectorUDFMonthLong() { } @Override - protected long getField(long time) { + protected long getTimestampField(long time) { /* january is 0 */ - return 1 + super.getField(time); + return 1 + super.getTimestampField(time); } + @Override + protected long getDateField(long days) { + /* january is 0 */ + return 1 + super.getDateField(days); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java index f840f09..9c1b827 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java @@ -24,9 +24,10 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; /** - * Abstract class to return various fields from a Timestamp. + * Abstract class to return various fields from a Timestamp or Date. */ public abstract class VectorUDFTimestampFieldLong extends VectorExpression { @@ -75,11 +76,16 @@ protected final Timestamp getTimestamp(long nanos) { return ts; } - protected long getField(long time) { + protected long getTimestampField(long time) { calendar.setTime(getTimestamp(time)); return calendar.get(field); } + protected long getDateField(long days) { + calendar.setTimeInMillis(DateWritable.daysToMillis((int) days)); + return calendar.get(field); + } + @Override public void evaluate(VectorizedRowBatch batch) { LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; @@ -96,38 +102,78 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; - if (inputCol.noNulls) { - outV.noNulls = true; - if (batch.selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = getField(inputCol.vector[i]); - } - } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = getField(inputCol.vector[i]); - } - } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (batch.selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getField(inputCol.vector[i]); + switch (inputTypes[0]) { + case TIMESTAMP: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getTimestampField(inputCol.vector[i]); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = getTimestampField(inputCol.vector[i]); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getTimestampField(inputCol.vector[i]); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getTimestampField(inputCol.vector[i]); + } + } } } - } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getField(inputCol.vector[i]); + break; + + case DATE: + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getDateField(inputCol.vector[i]); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = getDateField(inputCol.vector[i]); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getDateField(inputCol.vector[i]); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + outV.vector[i] = getDateField(inputCol.vector[i]); + } + } } } - } + break; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java index 0feed95..6df68f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.serde2.io.DateWritable; + /** * Return Unix Timestamp. * Extends {@link VectorUDFTimestampFieldLong} @@ -27,14 +29,20 @@ private static final long serialVersionUID = 1L; @Override - protected long getField(long time) { + protected long getTimestampField(long time) { long ms = (time / (1000*1000*1000)) * 1000; long remainder = time % (1000*1000*1000); /* negative timestamps need to be adjusted */ if(remainder < 0) { ms -= 1000; } - return ms/1000; + return ms / 1000; + } + + @Override + protected long getDateField(long days) { + long ms = DateWritable.daysToMillis((int) days); + return ms / 1000; } public VectorUDFUnixTimeStampLong(int colNum, int outputColumn) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java index abd87fe..bb79cfd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java @@ -45,7 +45,7 @@ } @Override - protected long getField(long time) { + protected long getTimestampField(long time) { /* binarySearch is faster than a loop doing a[i] (no array out of bounds checks) */ int year = Arrays.binarySearch(YEAR_BOUNDARIES, time); if(year >= 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 3bc9493..3f0b226 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.exec.vector.udf; +import java.sql.Date; import java.sql.Timestamp; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -27,9 +28,11 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; @@ -280,6 +283,16 @@ private void setOutputCol(ColumnVector colVec, int i, Object value) { // The % 1000000 operation removes the ms values // so that the milliseconds are not counted twice. lv.vector[i] = l; + } else if (outputOI instanceof WritableDateObjectInspector) { + LongColumnVector lv = (LongColumnVector) colVec; + Date ts; + if (value instanceof Date) { + ts = (Date) value; + } else { + ts = ((WritableDateObjectInspector) outputOI).getPrimitiveJavaObject(value); + } + long l = DateWritable.dateToDays(ts); + lv.vector[i] = l; } else if (outputOI instanceof WritableBooleanObjectInspector) { LongColumnVector lv = (LongColumnVector) colVec; if (value instanceof Boolean) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 7798a7c..d732c01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -2435,6 +2435,7 @@ static int getIndexPosition(OrcProto.ColumnEncoding.Kind encoding, case LONG: case FLOAT: case DOUBLE: + case DATE: case STRUCT: case MAP: case LIST: diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e6be03f..1cd67b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -190,6 +190,11 @@ public Vectorizer() { supportedGenericUDFs.add(UDFWeekOfYear.class); supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class); + supportedGenericUDFs.add(GenericUDFDateAdd.class); + supportedGenericUDFs.add(GenericUDFDateSub.class); + supportedGenericUDFs.add(GenericUDFDate.class); + supportedGenericUDFs.add(GenericUDFDateDiff.class); + supportedGenericUDFs.add(UDFLike.class); supportedGenericUDFs.add(UDFRegExp.class); supportedGenericUDFs.add(UDFSubstr.class); @@ -243,6 +248,7 @@ public Vectorizer() { supportedGenericUDFs.add(UDFToString.class); supportedGenericUDFs.add(GenericUDFTimestamp.class); supportedGenericUDFs.add(GenericUDFToDecimal.class); + supportedGenericUDFs.add(GenericUDFToDate.class); // For conditional expressions supportedGenericUDFs.add(GenericUDFIf.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java index 53b0bdb..c31174a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java @@ -25,6 +25,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateString; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -46,6 +49,7 @@ extended = "Example:\n " + " > SELECT _FUNC_('2009-07-30 04:17:52') FROM src LIMIT 1;\n" + " '2009-07-30'") +@VectorizedExpressions({VectorUDFDateString.class, VectorUDFDateLong.class}) public class GenericUDFDate extends GenericUDF { private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient TimestampConverter timestampConverter; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java index a3fe770..1b6c64d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java @@ -27,6 +27,10 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateAddColCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateAddColScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateAddScalarCol; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -59,6 +63,7 @@ + "Example:\n " + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " '2009-31-07'") +@VectorizedExpressions({VectorUDFDateAddColScalar.class, VectorUDFDateAddScalarCol.class, VectorUDFDateAddColCol.class}) public class GenericUDFDateAdd extends GenericUDF { private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient TimestampConverter timestampConverter; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java index ad75938..92b1000 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateDiff.java @@ -20,7 +20,6 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; import java.util.Date; import java.util.TimeZone; @@ -28,6 +27,10 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateDiffColCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateDiffColScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateDiffScalarCol; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -38,7 +41,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.IntWritable; /** @@ -57,6 +59,7 @@ + "Example:\n " + " > SELECT _FUNC_('2009-30-07', '2009-31-07') FROM src LIMIT 1;\n" + " 1") +@VectorizedExpressions({VectorUDFDateDiffColScalar.class, VectorUDFDateDiffColCol.class, VectorUDFDateDiffScalarCol.class}) public class GenericUDFDateDiff extends GenericUDF { private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient Converter inputConverter1; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java index ec72e87..125e467 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateSub.java @@ -27,6 +27,10 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubScalarCol; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -59,6 +63,7 @@ + "Example:\n " + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " '2009-29-07'") +@VectorizedExpressions({VectorUDFDateSubColScalar.class, VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class}) public class GenericUDFDateSub extends GenericUDF { private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient TimestampConverter timestampConverter; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java index 2689fca..0613005 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java @@ -20,6 +20,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToDate; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDate; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -38,6 +41,7 @@ + "Example:\n " + " > SELECT CAST('2009-01-01' AS DATE) FROM src LIMIT 1;\n" + " '2009-01-01'") +@VectorizedExpressions({CastStringToDate.class, CastLongToDate.class}) public class GenericUDFToDate extends GenericUDF { private transient PrimitiveObjectInspector argumentOI; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java new file mode 100644 index 0000000..6bd4be1 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java @@ -0,0 +1,423 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import junit.framework.Assert; +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; +import org.apache.hadoop.hive.ql.udf.UDFMonth; +import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; +import org.apache.hadoop.hive.ql.udf.UDFYear; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Test; + +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; +import java.util.Random; + +public class TestVectorDateExpressions { + /* copied over from VectorUDFTimestampFieldLong */ + private TimestampWritable toTimestampWritable(long daysSinceEpoch) { + Timestamp ts = new Timestamp(DateWritable.daysToMillis((int) daysSinceEpoch)); + return new TimestampWritable(ts); + } + + private int[] getAllBoundaries() { + List boundaries = new ArrayList(1); + Calendar c = Calendar.getInstance(); + c.setTimeInMillis(0); // c.set doesn't reset millis + for (int year = 1902; year <= 2038; year++) { + c.set(year, Calendar.JANUARY, 1, 0, 0, 0); + int exactly = (int) (c.getTimeInMillis() / (24 * 60 * 60 * 1000)); + int before = exactly - 1; + int after = exactly + 1; + boundaries.add(Integer.valueOf(before)); + boundaries.add(Integer.valueOf(exactly)); + boundaries.add(Integer.valueOf(after)); + } + Integer[] indices = boundaries.toArray(new Integer[1]); + return ArrayUtils.toPrimitive(indices); + } + + private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + LongColumnVector lcv = new LongColumnVector(size); + Random rand = new Random(seed); + for (int i = 0; i < size; i++) { + lcv.vector[i] = (rand.nextInt()); + } + batch.cols[0] = lcv; + batch.cols[1] = new LongColumnVector(size); + batch.size = size; + return batch; + } + + /* + * Input array is used to fill the entire size of the vector row batch + */ + private VectorizedRowBatch getVectorizedRowBatch(int[] inputs, int size) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + LongColumnVector lcv = new LongColumnVector(size); + for (int i = 0; i < size; i++) { + lcv.vector[i] = inputs[i % inputs.length]; + } + batch.cols[0] = lcv; + batch.cols[1] = new LongColumnVector(size); + batch.size = size; + return batch; + } + + private void compareToUDFYearDate(long t, int y) { + UDFYear udf = new UDFYear(); + TimestampWritable tsw = toTimestampWritable(t); + IntWritable res = udf.evaluate(tsw); + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFYear(VectorizedRowBatch batch) { + VectorExpression udf = null; + udf = new VectorUDFYearLong(0, 1); + udf.setInputTypes(VectorExpression.Type.DATE); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + if (!batch.cols[in].noNulls) { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFYearDate(t, (int) y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFYear() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFYear(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFYear(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFYear(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFYear(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFYear(batch); + } + + private void compareToUDFDayOfMonthDate(long t, int y) { + UDFDayOfMonth udf = new UDFDayOfMonth(); + TimestampWritable tsw = toTimestampWritable(t); + IntWritable res = udf.evaluate(tsw); + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFDayOfMonth(VectorizedRowBatch batch) { + VectorExpression udf = null; + udf = new VectorUDFDayOfMonthLong(0, 1); + udf.setInputTypes(VectorExpression.Type.DATE); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + if (!batch.cols[in].noNulls) { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFDayOfMonthDate(t, (int) y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFDayOfMonth() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFDayOfMonth(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFDayOfMonth(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFDayOfMonth(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFDayOfMonth(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFDayOfMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFDayOfMonth(batch); + } + + private void compareToUDFMonthDate(long t, int y) { + UDFMonth udf = new UDFMonth(); + TimestampWritable tsw = toTimestampWritable(t); + IntWritable res = udf.evaluate(tsw); + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFMonth(VectorizedRowBatch batch) { + VectorExpression udf; + udf = new VectorUDFMonthLong(0, 1); + udf.setInputTypes(VectorExpression.Type.DATE); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + if (!batch.cols[in].noNulls) { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFMonthDate(t, (int) y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFMonth() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFMonth(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFMonth(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFMonth(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFMonth(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFMonth(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFMonth(batch); + } + + private LongWritable getLongWritable(TimestampWritable i) { + LongWritable result = new LongWritable(); + if (i == null) { + return null; + } else { + result.set(i.getSeconds()); + return result; + } + } + + private void compareToUDFUnixTimeStampDate(long t, long y) { + TimestampWritable tsw = toTimestampWritable(t); + LongWritable res = getLongWritable(tsw); + if(res.get() != y) { + System.out.printf("%d vs %d for %d, %d\n", res.get(), y, t, + tsw.getTimestamp().getTime()/1000); + } + + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch) { + VectorExpression udf; + udf = new VectorUDFUnixTimeStampLong(0, 1); + udf.setInputTypes(VectorExpression.Type.DATE); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + if (!batch.cols[out].noNulls) { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFUnixTimeStampDate(t, y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFUnixTimeStamp() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFUnixTimeStamp(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFUnixTimeStamp(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFUnixTimeStamp(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFUnixTimeStamp(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFUnixTimeStamp(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFUnixTimeStamp(batch); + } + + private void compareToUDFWeekOfYearDate(long t, int y) { + UDFWeekOfYear udf = new UDFWeekOfYear(); + TimestampWritable tsw = toTimestampWritable(t); + IntWritable res = udf.evaluate(tsw); + Assert.assertEquals(res.get(), y); + } + + private void verifyUDFWeekOfYear(VectorizedRowBatch batch) { + VectorExpression udf; + udf = new VectorUDFWeekOfYearLong(0, 1); + udf.setInputTypes(VectorExpression.Type.DATE); + udf.evaluate(batch); + final int in = 0; + final int out = 1; + + for (int i = 0; i < batch.size; i++) { + if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { + long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long y = ((LongColumnVector) batch.cols[out]).vector[i]; + compareToUDFWeekOfYearDate(t, (int) y); + } else { + Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); + } + } + } + + @Test + public void testVectorUDFWeekOfYear() { + VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFWeekOfYear(batch); + + int[] boundaries = getAllBoundaries(); + batch = getVectorizedRowBatch(boundaries, boundaries.length); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFWeekOfYear(batch); + + batch = getVectorizedRowBatch(new int[] {0}, 1); + batch.cols[0].isRepeating = true; + verifyUDFWeekOfYear(batch); + batch.cols[0].noNulls = false; + batch.cols[0].isNull[0] = true; + verifyUDFWeekOfYear(batch); + + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + verifyUDFWeekOfYear(batch); + TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); + verifyUDFWeekOfYear(batch); + } + + public static void main(String[] args) { + TestVectorDateExpressions self = new TestVectorDateExpressions(); + self.testVectorUDFYear(); + self.testVectorUDFMonth(); + self.testVectorUDFDayOfMonth(); + self.testVectorUDFWeekOfYear(); + self.testVectorUDFUnixTimeStamp(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java new file mode 100644 index 0000000..849c9e8 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java @@ -0,0 +1,751 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.junit.Assert; +import org.junit.Test; + + +import java.io.UnsupportedEncodingException; +import java.sql.Date; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +public class TestVectorGenericDateExpressions { + private int size = 200; + private Random random = new Random(); + private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private List dateTimestampStringTypes = + Arrays.asList(VectorExpression.Type.DATE, VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING); + + private long newRandom(int i) { + return random.nextInt(i); + } + + private LongColumnVector newRandomLongColumnVector(int range, int size) { + LongColumnVector vector = new LongColumnVector(size); + for (int i = 0; i < size; i++) { + vector.vector[i] = random.nextInt(range); + } + return vector; + } + + private LongColumnVector toTimestamp(LongColumnVector date) { + LongColumnVector vector = new LongColumnVector(size); + for (int i = 0; i < size; i++) { + if (date.isNull[i]) { + vector.isNull[i] = true; + vector.noNulls = false; + } else { + vector.vector[i] = toTimestamp(date.vector[i]); + } + } + return vector; + } + + private long toTimestamp(long date) { + return DateWritable.daysToMillis((int) date) * 1000000; + } + + private BytesColumnVector toString(LongColumnVector date) { + BytesColumnVector bcv = new BytesColumnVector(size); + for (int i = 0; i < size; i++) { + if (date.isNull[i]) { + bcv.isNull[i] = true; + bcv.noNulls = false; + } else { + bcv.vector[i] = toString(date.vector[i]); + bcv.start[i] = 0; + bcv.length[i] = bcv.vector[i].length; + } + } + return bcv; + } + + private byte[] toString(long date) { + try { + String formatted = formatter.format(new Date(DateWritable.daysToMillis((int) date))); + return formatted.getBytes("UTF-8"); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void validateDateAdd(VectorizedRowBatch batch, VectorExpression.Type colType1, long scalar2, + boolean isPositive, LongColumnVector date1) { + VectorUDFDateAddColScalar udf; + if (isPositive) { + udf = new VectorUDFDateAddColScalar(0, scalar2, 1); + } else { + udf = new VectorUDFDateSubColScalar(0, scalar2, 1); + } + udf.setInputTypes(colType1, VectorExpression.Type.OTHER); + udf.evaluate(batch); + BytesColumnVector output = (BytesColumnVector) batch.cols[1]; + + try { + for (int i = 0; i < size; i++) { + String expected; + if (isPositive) { + expected = new String(toString(date1.vector[i] + scalar2), "UTF-8"); + } else { + expected = new String(toString(date1.vector[i] - scalar2), "UTF-8"); + } + if (date1.isNull[i]) { + Assert.assertTrue(output.isNull[i]); + } else { + String actual = new String(output.vector[i], output.start[i], output.start[i] + output.length[i], "UTF-8"); + Assert.assertEquals("expectedLen:" + expected.length() + " actualLen:" + actual.length(), expected, actual); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private ColumnVector castTo(LongColumnVector date, VectorExpression.Type type) { + switch (type) { + case DATE: + return date; + + case TIMESTAMP: + return toTimestamp(date); + + case STRING: + return toString(date); + } + return null; + } + + private void testDateAddColScalar(VectorExpression.Type colType1, boolean isPositive) { + LongColumnVector date1 = newRandomLongColumnVector(10000, size); + ColumnVector col1 = castTo(date1, colType1); + long scalar2 = newRandom(1000); + BytesColumnVector output = new BytesColumnVector(size); + + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + batch.cols[0] = col1; + batch.cols[1] = output; + + validateDateAdd(batch, colType1, scalar2, isPositive, date1); + TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); + validateDateAdd(batch, colType1, scalar2, isPositive, date1); + } + + @Test + public void testDateAddColScalar() { + for (VectorExpression.Type colType1 : dateTimestampStringTypes) + testDateAddColScalar(colType1, true); + + VectorExpression udf = new VectorUDFDateAddColScalar(0, 0, 1); + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new BytesColumnVector(1); + BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + } + + @Test + public void testDateSubColScalar() { + for (VectorExpression.Type colType1 : dateTimestampStringTypes) + testDateAddColScalar(colType1, false); + + VectorExpression udf = new VectorUDFDateSubColScalar(0, 0, 1); + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new BytesColumnVector(1); + BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + } + + private void validateDateAdd(VectorizedRowBatch batch, long scalar1, LongColumnVector date2, + VectorExpression.Type colType1, boolean isPositive) { + VectorExpression udf = null; + if (isPositive) { + switch (colType1) { + case DATE: + udf = new VectorUDFDateAddScalarCol(scalar1, 0, 1); + break; + case TIMESTAMP: + udf = new VectorUDFDateAddScalarCol(toTimestamp(scalar1), 0, 1); + break; + case STRING: + udf = new VectorUDFDateAddScalarCol(toString(scalar1), 0, 1); + break; + } + } else { + switch (colType1) { + case DATE: + udf = new VectorUDFDateSubScalarCol(scalar1, 0, 1); + break; + case TIMESTAMP: + udf = new VectorUDFDateSubScalarCol(toTimestamp(scalar1), 0, 1); + break; + case STRING: + udf = new VectorUDFDateSubScalarCol(toString(scalar1), 0, 1); + break; + } + } + udf.setInputTypes(colType1, VectorExpression.Type.OTHER); + udf.evaluate(batch); + + BytesColumnVector output = (BytesColumnVector) batch.cols[1]; + try { + for (int i = 0; i < date2.vector.length; i++) { + String expected; + if (isPositive) { + expected = new String(toString(scalar1 + date2.vector[i]), "UTF-8"); + } else { + expected = new String(toString(scalar1 - date2.vector[i]), "UTF-8"); + } + if (date2.isNull[i]) { + Assert.assertTrue(output.isNull[i]); + } else { + Assert.assertEquals(expected, + new String(output.vector[i], output.start[i], output.start[i] + output.length[i], "UTF-8")); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void testDateAddScalarCol(VectorExpression.Type colType1, boolean isPositive) { + LongColumnVector date2 = newRandomLongColumnVector(10000, size); + long scalar1 = newRandom(1000); + + BytesColumnVector output = new BytesColumnVector(size); + + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + batch.cols[0] = date2; + batch.cols[1] = output; + validateDateAdd(batch, scalar1, date2, colType1, isPositive); + TestVectorizedRowBatch.addRandomNulls(date2); + batch.cols[0] = date2; + validateDateAdd(batch, scalar1, date2, colType1, isPositive); + } + + @Test + public void testDateAddScalarCol() { + for (VectorExpression.Type scalarType1 : dateTimestampStringTypes) + testDateAddScalarCol(scalarType1, true); + + VectorExpression udf = null; + try { + udf = new VectorUDFDateAddScalarCol("error".getBytes("UTF-8"), 0, 1); + } catch (UnsupportedEncodingException e) { + } + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + batch.cols[0] = new LongColumnVector(1); + batch.cols[1] = new BytesColumnVector(1); + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + } + + @Test + public void testDateSubScalarCol() { + for (VectorExpression.Type scalarType1 : dateTimestampStringTypes) + testDateAddScalarCol(scalarType1, false); + + VectorExpression udf = null; + try { + udf = new VectorUDFDateSubScalarCol("error".getBytes("UTF-8"), 0, 1); + } catch (UnsupportedEncodingException e) { + } + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + batch.cols[0] = new LongColumnVector(1); + batch.cols[1] = new BytesColumnVector(1); + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + } + + private void validateDateAdd(VectorizedRowBatch batch, + LongColumnVector date1, LongColumnVector date2, + VectorExpression.Type colType1, boolean isPositive) { + VectorExpression udf; + if (isPositive) { + udf = new VectorUDFDateAddColCol(0, 1, 2); + } else { + udf = new VectorUDFDateSubColCol(0, 1, 2); + } + udf.setInputTypes(colType1, VectorExpression.Type.OTHER); + udf.evaluate(batch); + BytesColumnVector output = (BytesColumnVector) batch.cols[2]; + try { + for (int i = 0; i < date2.vector.length; i++) { + String expected; + if (isPositive) { + expected = new String(toString(date1.vector[i] + date2.vector[i]), "UTF-8"); + } else { + expected = new String(toString(date1.vector[i] - date2.vector[i]), "UTF-8"); + } + if (date1.isNull[i] || date2.isNull[i]) { + Assert.assertTrue(output.isNull[i]); + } else { + Assert.assertEquals(expected, + new String(output.vector[i], output.start[i], output.start[i] + output.length[i], "UTF-8")); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void testDateAddColCol(VectorExpression.Type colType1, boolean isPositive) { + LongColumnVector date1 = newRandomLongColumnVector(10000, size); + LongColumnVector days2 = newRandomLongColumnVector(1000, size); + ColumnVector col1 = castTo(date1, colType1); + + BytesColumnVector output = new BytesColumnVector(size); + + VectorizedRowBatch batch = new VectorizedRowBatch(3, size); + batch.cols[0] = col1; + batch.cols[1] = days2; + batch.cols[2] = output; + + validateDateAdd(batch, date1, days2, colType1, isPositive); + TestVectorizedRowBatch.addRandomNulls(date1); + batch.cols[0] = castTo(date1, colType1); + validateDateAdd(batch, date1, days2, colType1, isPositive); + TestVectorizedRowBatch.addRandomNulls(days2); + batch.cols[1] = days2; + validateDateAdd(batch, date1, days2, colType1, isPositive); + } + + @Test + public void testDateAddColCol() { + for (VectorExpression.Type colType1 : dateTimestampStringTypes) + testDateAddColCol(colType1, true); + + VectorExpression udf = new VectorUDFDateAddColCol(0, 1, 2); + VectorizedRowBatch batch = new VectorizedRowBatch(3, 1); + BytesColumnVector bcv; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); + batch.cols[2] = new BytesColumnVector(1); + bcv = (BytesColumnVector) batch.cols[0]; + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[2].isNull[0], true); + } + + @Test + public void testDateSubColCol() { + for (VectorExpression.Type colType1 : dateTimestampStringTypes) + testDateAddColCol(colType1, false); + + VectorExpression udf = new VectorUDFDateSubColCol(0, 1, 2); + VectorizedRowBatch batch = new VectorizedRowBatch(3, 1); + BytesColumnVector bcv; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); + batch.cols[2] = new BytesColumnVector(1); + bcv = (BytesColumnVector) batch.cols[0]; + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[2].isNull[0], true); + } + + private void validateDateDiff(VectorizedRowBatch batch, long scalar1, + VectorExpression.Type scalarType1, VectorExpression.Type colType2, + LongColumnVector date2) { + VectorExpression udf = null; + switch (scalarType1) { + case DATE: + udf = new VectorUDFDateDiffScalarCol(scalar1, 0, 1); + break; + + case TIMESTAMP: + udf = new VectorUDFDateDiffScalarCol(toTimestamp(scalar1), 0, 1); + break; + + case STRING: + udf = new VectorUDFDateDiffScalarCol(toString(scalar1), 0, 1); + break; + } + + udf.setInputTypes(scalarType1, colType2); + udf.evaluate(batch); + + LongColumnVector output = (LongColumnVector) batch.cols[1]; + for (int i = 0; i < date2.vector.length; i++) { + Assert.assertEquals(scalar1 - date2.vector[i], output.vector[i]); + } + } + + @Test + public void testDateDiffScalarCol() { + for (VectorExpression.Type scalarType1 : dateTimestampStringTypes) { + for (VectorExpression.Type colType2 : dateTimestampStringTypes) { + LongColumnVector date2 = newRandomLongColumnVector(10000, size); + LongColumnVector output = new LongColumnVector(size); + ColumnVector col2 = castTo(date2, colType2); + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + batch.cols[0] = col2; + batch.cols[1] = output; + long scalar1 = newRandom(1000); + + validateDateDiff(batch, scalar1, scalarType1, colType2, date2); + TestVectorizedRowBatch.addRandomNulls(date2); + batch.cols[0] = castTo(date2, colType2); + validateDateDiff(batch, scalar1, scalarType1, colType2, date2); + } + } + + VectorExpression udf; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + + udf = new VectorUDFDateDiffScalarCol(0, 0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); + + BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + + udf = new VectorUDFDateDiffScalarCol(bytes, 0, 1); + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); + batch.cols[0] = new LongColumnVector(1); + batch.cols[1] = new LongColumnVector(1); + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + } + + private void validateDateDiff(VectorizedRowBatch batch, LongColumnVector date1, long scalar2, + VectorExpression.Type colType1, VectorExpression.Type scalarType2) { + VectorExpression udf = null; + switch (scalarType2) { + case DATE: + udf = new VectorUDFDateDiffColScalar(0, scalar2, 1); + break; + + case TIMESTAMP: + udf = new VectorUDFDateDiffColScalar(0, toTimestamp(scalar2), 1); + break; + + case STRING: + udf = new VectorUDFDateDiffColScalar(0, toString(scalar2), 1); + break; + } + + udf.setInputTypes(colType1, scalarType2); + udf.evaluate(batch); + + LongColumnVector output = (LongColumnVector) batch.cols[1]; + for (int i = 0; i < date1.vector.length; i++) { + Assert.assertEquals(date1.vector[i] - scalar2, output.vector[i]); + } + } + + @Test + public void testDateDiffColScalar() { + for (VectorExpression.Type colType1 : dateTimestampStringTypes) { + for (VectorExpression.Type scalarType2 : dateTimestampStringTypes) { + LongColumnVector date1 = newRandomLongColumnVector(10000, size); + LongColumnVector output = new LongColumnVector(size); + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + batch.cols[0] = castTo(date1, colType1); + batch.cols[1] = output; + long scalar2 = newRandom(1000); + + validateDateDiff(batch, date1, scalar2, colType1, scalarType2); + TestVectorizedRowBatch.addRandomNulls(date1); + batch.cols[0] = castTo(date1, colType1); + validateDateDiff(batch, date1, scalar2, colType1, scalarType2); + } + } + VectorExpression udf; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + + udf = new VectorUDFDateDiffColScalar(0, 0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); + + BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + + udf = new VectorUDFDateDiffColScalar(0, bytes, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING); + batch.cols[0] = new LongColumnVector(1); + batch.cols[1] = new LongColumnVector(1); + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + } + + private void validateDateDiff(VectorizedRowBatch batch, + LongColumnVector date1, LongColumnVector date2, + VectorExpression.Type colType1, VectorExpression.Type colType2) { + VectorExpression udf = new VectorUDFDateDiffColCol(0, 1, 2); + udf.setInputTypes(colType1, colType2); + udf.evaluate(batch); + LongColumnVector output = (LongColumnVector) batch.cols[2]; + for (int i = 0; i < date1.vector.length; i++) { + if (date1.isNull[i] || date2.isNull[i]) { + Assert.assertTrue(output.isNull[i]); + } else { + Assert.assertEquals(date1.vector[i] - date2.vector[i], output.vector[i]); + } + } + } + + @Test + public void testDateDiffColCol() { + for (VectorExpression.Type colType1 : dateTimestampStringTypes) { + for (VectorExpression.Type colType2 : dateTimestampStringTypes) { + LongColumnVector date1 = newRandomLongColumnVector(10000, size); + LongColumnVector date2 = newRandomLongColumnVector(10000, size); + LongColumnVector output = new LongColumnVector(size); + VectorizedRowBatch batch = new VectorizedRowBatch(3, size); + + batch.cols[0] = castTo(date1, colType1); + batch.cols[1] = castTo(date2, colType2); + batch.cols[2] = output; + + validateDateDiff(batch, date1, date2, colType1, colType2); + TestVectorizedRowBatch.addRandomNulls(date1); + batch.cols[0] = castTo(date1, colType1); + validateDateDiff(batch, date1, date2, colType1, colType2); + TestVectorizedRowBatch.addRandomNulls(date2); + batch.cols[1] = castTo(date2, colType2); + validateDateDiff(batch, date1, date2, colType1, colType2); + } + } + + VectorExpression udf = new VectorUDFDateDiffColCol(0, 1, 2); + VectorizedRowBatch batch = new VectorizedRowBatch(3, 1); + BytesColumnVector bcv; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + + udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); + batch.cols[2] = new LongColumnVector(1); + bcv = (BytesColumnVector) batch.cols[0]; + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[2].isNull[0], true); + + udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING); + batch.cols[0] = new LongColumnVector(1); + batch.cols[1] = new BytesColumnVector(1); + batch.cols[2] = new LongColumnVector(1); + bcv = (BytesColumnVector) batch.cols[1]; + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[2].isNull[0], true); + } + + private void validateDate(VectorizedRowBatch batch, VectorExpression.Type colType, LongColumnVector date) { + VectorExpression udf; + if (colType == VectorExpression.Type.STRING) { + udf = new VectorUDFDateString(0, 1); + } else { + udf = new VectorUDFDateLong(0, 1); + } + + udf.setInputTypes(colType); + udf.evaluate(batch); + BytesColumnVector output = (BytesColumnVector) batch.cols[1]; + + for (int i = 0; i < size; i++) { + String actual; + if (output.isNull[i]) { + actual = null; + } else { + try { + actual = new String(output.vector[i], output.start[i], output.length[i], "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + if (date.isNull[i]) { + Assert.assertTrue(output.isNull[i]); + } else { + String expected = formatter.format(new Date(DateWritable.daysToMillis((int) date.vector[i]))); + Assert.assertEquals(expected, actual); + } + } + } + + @Test + public void testDate() { + for (VectorExpression.Type colType : dateTimestampStringTypes) { + LongColumnVector date = newRandomLongColumnVector(10000, size); + BytesColumnVector output = new BytesColumnVector(size); + + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + batch.cols[0] = castTo(date, colType); + batch.cols[1] = output; + + validateDate(batch, colType, date); + TestVectorizedRowBatch.addRandomNulls(date); + batch.cols[0] = castTo(date, colType); + validateDate(batch, colType, date); + } + + VectorExpression udf = new VectorUDFDateString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new BytesColumnVector(1); + BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + } + + private void validateToDate(VectorizedRowBatch batch, VectorExpression.Type colType, LongColumnVector date) { + VectorExpression udf; + if (colType == VectorExpression.Type.STRING) { + udf = new CastStringToDate(0, 1); + } else { + udf = new CastLongToDate(0, 1); + } + udf.setInputTypes(colType); + udf.evaluate(batch); + LongColumnVector output = (LongColumnVector) batch.cols[1]; + + for (int i = 0; i < size; i++) { + long actual = output.vector[i]; + if (date.isNull[i]) { + Assert.assertTrue(output.isNull[i]); + } else { + long expected = date.vector[i]; + Assert.assertEquals(expected, actual); + } + } + } + + @Test + public void testToDate() { + for (VectorExpression.Type type : + Arrays.asList(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING)) { + LongColumnVector date = newRandomLongColumnVector(10000, size); + LongColumnVector output = new LongColumnVector(size); + + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + batch.cols[0] = castTo(date, type); + batch.cols[1] = output; + + validateToDate(batch, type, date); + TestVectorizedRowBatch.addRandomNulls(date); + batch.cols[0] = castTo(date, type); + validateToDate(batch, type, date); + } + + VectorExpression udf = new CastStringToDate(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + batch.cols[0] = new BytesColumnVector(1); + batch.cols[1] = new LongColumnVector(1); + BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; + byte[] bytes = new byte[0]; + try { + bytes = "error".getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + } + bcv.vector[0] = bytes; + bcv.start[0] = 0; + bcv.length[0] = bytes.length; + udf.evaluate(batch); + Assert.assertEquals(batch.cols[1].isNull[0], true); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java index 41fa02c..419254b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java @@ -237,8 +237,10 @@ private void verifyUDFYear(VectorizedRowBatch batch, TestType testType) { VectorExpression udf = null; if (testType == TestType.LONG2) { udf = new VectorUDFYearLong(0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFYearString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); } udf.evaluate(batch); final int in = 0; @@ -318,8 +320,10 @@ private void verifyUDFDayOfMonth(VectorizedRowBatch batch, TestType testType) { VectorExpression udf = null; if (testType == TestType.LONG2) { udf = new VectorUDFDayOfMonthLong(0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFDayOfMonthString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); } udf.evaluate(batch); final int in = 0; @@ -392,8 +396,10 @@ private void verifyUDFHour(VectorizedRowBatch batch, TestType testType) { VectorExpression udf = null; if (testType == TestType.LONG2) { udf = new VectorUDFHourLong(0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFHourString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); } udf.evaluate(batch); final int in = 0; @@ -466,8 +472,10 @@ private void verifyUDFMinute(VectorizedRowBatch batch, TestType testType) { VectorExpression udf = null; if (testType == TestType.LONG2) { udf = new VectorUDFMinuteLong(0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFMinuteString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); } udf.evaluate(batch); final int in = 0; @@ -540,8 +548,10 @@ private void verifyUDFMonth(VectorizedRowBatch batch, TestType testType) { VectorExpression udf; if (testType == TestType.LONG2) { udf = new VectorUDFMonthLong(0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFMonthString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); } udf.evaluate(batch); final int in = 0; @@ -614,8 +624,10 @@ private void verifyUDFSecond(VectorizedRowBatch batch, TestType testType) { VectorExpression udf; if (testType == TestType.LONG2) { udf = new VectorUDFSecondLong(0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFSecondString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); } udf.evaluate(batch); final int in = 0; @@ -702,8 +714,10 @@ private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch, TestType testType) VectorExpression udf; if (testType == TestType.LONG2) { udf = new VectorUDFUnixTimeStampLong(0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFUnixTimeStampString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); } udf.evaluate(batch); final int in = 0; @@ -776,8 +790,10 @@ private void verifyUDFWeekOfYear(VectorizedRowBatch batch, TestType testType) { VectorExpression udf; if (testType == TestType.LONG2) { udf = new VectorUDFWeekOfYearLong(0, 1); + udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFWeekOfYearString(0, 1); + udf.setInputTypes(VectorExpression.Type.STRING); } udf.evaluate(batch); final int in = 0; diff --git ql/src/test/queries/clientpositive/vectorized_date_funcs.q ql/src/test/queries/clientpositive/vectorized_date_funcs.q new file mode 100644 index 0000000..6c9515c --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_date_funcs.q @@ -0,0 +1,104 @@ +SET hive.vectorized.execution.enabled = true; + +-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE date_udf_flight ( + origin_city_name STRING, + dest_city_name STRING, + fl_date DATE, + arr_delay FLOAT, + fl_num INT +); +LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight; + +CREATE TABLE date_udf_flight_orc ( + fl_date DATE, + fl_time TIMESTAMP +) STORED AS ORC; + +INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') FROM date_udf_flight; + +SELECT * FROM date_udf_flight_orc; + +EXPLAIN SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time), + date(fl_time), + to_date(fl_time), + date_add(fl_time, 2), + date_sub(fl_time, 2), + datediff(fl_time, "2000-01-01") +FROM date_udf_flight_orc; + +SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time), + date(fl_time), + to_date(fl_time), + date_add(fl_time, 2), + date_sub(fl_time, 2), + datediff(fl_time, "2000-01-01") +FROM date_udf_flight_orc; + +EXPLAIN SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date), + date(fl_date), + to_date(fl_date), + date_add(fl_date, 2), + date_sub(fl_date, 2), + datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc; + +SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date), + date(fl_date), + to_date(fl_date), + date_add(fl_date, 2), + date_sub(fl_date, 2), + datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc; + +EXPLAIN SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date), + date(fl_time) = date(fl_date), + to_date(fl_time) = to_date(fl_date), + date_add(fl_time, 2) = date_add(fl_date, 2), + date_sub(fl_time, 2) = date_sub(fl_date, 2), + datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc; + +-- Should all be true or NULL +SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date), + date(fl_time) = date(fl_date), + to_date(fl_time) = to_date(fl_date), + date_add(fl_time, 2) = date_add(fl_date, 2), + date_sub(fl_time, 2) = date_sub(fl_date, 2), + datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc; diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out new file mode 100644 index 0000000..a9d7dde --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -0,0 +1,886 @@ +PREHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE date_udf_flight ( + origin_city_name STRING, + dest_city_name STRING, + fl_date DATE, + arr_delay FLOAT, + fl_num INT +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE date_udf_flight ( + origin_city_name STRING, + dest_city_name STRING, + fl_date DATE, + arr_delay FLOAT, + fl_num INT +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_udf_flight +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@date_udf_flight +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE date_udf_flight +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@date_udf_flight +PREHOOK: query: CREATE TABLE date_udf_flight_orc ( + fl_date DATE, + fl_time TIMESTAMP +) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE date_udf_flight_orc ( + fl_date DATE, + fl_time TIMESTAMP +) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_udf_flight_orc +PREHOOK: query: INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') FROM date_udf_flight +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight +PREHOOK: Output: default@date_udf_flight_orc +POSTHOOK: query: INSERT INTO TABLE date_udf_flight_orc SELECT fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') FROM date_udf_flight +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight +POSTHOOK: Output: default@date_udf_flight_orc +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +PREHOOK: query: SELECT * FROM date_udf_flight_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-31 2010-10-31 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-22 2010-10-22 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-30 2010-10-30 07:00:00 +2010-10-20 2010-10-20 07:00:00 +2010-10-21 2010-10-21 07:00:00 +2010-10-23 2010-10-23 07:00:00 +2010-10-24 2010-10-24 07:00:00 +2010-10-25 2010-10-25 07:00:00 +2010-10-26 2010-10-26 07:00:00 +2010-10-27 2010-10-27 07:00:00 +2010-10-28 2010-10-28 07:00:00 +2010-10-29 2010-10-29 07:00:00 +2010-10-31 2010-10-31 07:00:00 +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time), + date(fl_time), + to_date(fl_time), + date_add(fl_time, 2), + date_sub(fl_time, 2), + datediff(fl_time, "2000-01-01") +FROM date_udf_flight_orc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time), + date(fl_time), + to_date(fl_time), + date_add(fl_time, 2), + date_sub(fl_time, 2), + datediff(fl_time, "2000-01-01") +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: string), date_add(fl_time, 2) (type: string), date_sub(fl_time, 2) (type: string), datediff(fl_time, '2000-01-01') (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time), + date(fl_time), + to_date(fl_time), + date_add(fl_time, 2), + date_sub(fl_time, 2), + datediff(fl_time, "2000-01-01") +FROM date_udf_flight_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(fl_time), + year(fl_time), + month(fl_time), + day(fl_time), + dayofmonth(fl_time), + weekofyear(fl_time), + date(fl_time), + to_date(fl_time), + date_add(fl_time, 2), + date_sub(fl_time, 2), + datediff(fl_time, "2000-01-01") +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288533600 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288533600 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288533600 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288533600 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288533600 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288533600 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288533600 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287756000 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288447200 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1287583200 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287669600 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287842400 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287928800 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1288015200 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288101600 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288188000 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288274400 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288360800 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288533600 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date), + date(fl_date), + to_date(fl_date), + date_add(fl_date, 2), + date_sub(fl_date, 2), + datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date), + date(fl_date), + to_date(fl_date), + date_add(fl_date, 2), + date_sub(fl_date, 2), + datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), weekofyear(fl_date) (type: int), CAST( fl_date AS DATE) (type: date), to_date(fl_date) (type: string), date_add(fl_date, 2) (type: string), date_sub(fl_date, 2) (type: string), datediff(fl_date, '2000-01-01') (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date), + date(fl_date), + to_date(fl_date), + date_add(fl_date, 2), + date_sub(fl_date, 2), + datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(fl_date), + year(fl_date), + month(fl_date), + day(fl_date), + dayofmonth(fl_date), + weekofyear(fl_date), + date(fl_date), + to_date(fl_date), + date_add(fl_date, 2), + date_sub(fl_date, 2), + datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288508400 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288508400 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288508400 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288508400 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288508400 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288508400 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288508400 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287730800 2010 10 22 22 42 2010-10-22 2010-10-22 2010-10-24 2010-10-20 3947 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1288422000 2010 10 30 30 43 2010-10-30 2010-10-30 2010-11-01 2010-10-28 3955 +1287558000 2010 10 20 20 42 2010-10-20 2010-10-20 2010-10-22 2010-10-18 3945 +1287644400 2010 10 21 21 42 2010-10-21 2010-10-21 2010-10-23 2010-10-19 3946 +1287817200 2010 10 23 23 42 2010-10-23 2010-10-23 2010-10-25 2010-10-21 3948 +1287903600 2010 10 24 24 42 2010-10-24 2010-10-24 2010-10-26 2010-10-22 3949 +1287990000 2010 10 25 25 43 2010-10-25 2010-10-25 2010-10-27 2010-10-23 3950 +1288076400 2010 10 26 26 43 2010-10-26 2010-10-26 2010-10-28 2010-10-24 3951 +1288162800 2010 10 27 27 43 2010-10-27 2010-10-27 2010-10-29 2010-10-25 3952 +1288249200 2010 10 28 28 43 2010-10-28 2010-10-28 2010-10-30 2010-10-26 3953 +1288335600 2010 10 29 29 43 2010-10-29 2010-10-29 2010-10-31 2010-10-27 3954 +1288508400 2010 10 31 31 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 +PREHOOK: query: EXPLAIN SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date), + date(fl_time) = date(fl_date), + to_date(fl_time) = to_date(fl_date), + date_add(fl_time, 2) = date_add(fl_date, 2), + date_sub(fl_time, 2) = date_sub(fl_date, 2), + datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date), + date(fl_time) = date(fl_date), + to_date(fl_time) = to_date(fl_date), + date_add(fl_time, 2) = date_add(fl_date, 2), + date_sub(fl_time, 2) = date_sub(fl_date, 2), + datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = CAST( fl_date AS DATE)) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: -- Should all be true or NULL +SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date), + date(fl_time) = date(fl_date), + to_date(fl_time) = to_date(fl_date), + date_add(fl_time, 2) = date_add(fl_date, 2), + date_sub(fl_time, 2) = date_sub(fl_date, 2), + datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: -- Should all be true or NULL +SELECT + year(fl_time) = year(fl_date), + month(fl_time) = month(fl_date), + day(fl_time) = day(fl_date), + dayofmonth(fl_time) = dayofmonth(fl_date), + weekofyear(fl_time) = weekofyear(fl_date), + date(fl_time) = date(fl_date), + to_date(fl_time) = to_date(fl_date), + date_add(fl_time, 2) = date_add(fl_date, 2), + date_sub(fl_time, 2) = date_sub(fl_date, 2), + datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01") +FROM date_udf_flight_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: date_udf_flight_orc.fl_date SIMPLE [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +POSTHOOK: Lineage: date_udf_flight_orc.fl_time EXPRESSION [(date_udf_flight)date_udf_flight.FieldSchema(name:fl_date, type:date, comment:null), ] +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true +true true true true true true true true true true