diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java new file mode 100644 index 0000000..43110c5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Expression to get day of month. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFDayOfMonthString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFDayOfMonthString(int colNum, int outputColumn) { + super(colNum, outputColumn, 8, 2); + } + + public VectorUDFDayOfMonthString() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java new file mode 100644 index 0000000..066d548 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Returns hour of day. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFHourString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFHourString(int colNum, int outputColumn) { + super(colNum, outputColumn, 11, 2); + } + + public VectorUDFHourString() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java new file mode 100644 index 0000000..3324c3f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Returns minute value. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFMinuteString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFMinuteString(int colNum, int outputColumn) { + super(colNum, outputColumn, 14, 2); + } + + public VectorUDFMinuteString() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java new file mode 100644 index 0000000..c2d3392 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Returns month value. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFMonthString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFMonthString(int colNum, int outputColumn) { + super(colNum, outputColumn, 5, 2); + } + + public VectorUDFMonthString() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java new file mode 100644 index 0000000..b6617ba --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Expression to get seconds. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFSecondString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFSecondString(int colNum, int outputColumn) { + super(colNum, outputColumn, 17, 2); + } + + public VectorUDFSecondString() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java new file mode 100644 index 0000000..011a790 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -0,0 +1,184 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import java.text.ParseException; + +/** + * Abstract class to return various fields from a String. + */ +public abstract class VectorUDFTimestampFieldString extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected int outputColumn; + protected final int fieldStart; + protected final int fieldLength; + private static final String patternMin = "0000-00-00 00:00:00.000000000"; + private static final String patternMax = "9999-19-99 29:59:59.999999999"; + + public VectorUDFTimestampFieldString(int colNum, int outputColumn, int fieldStart, int fieldLength) { + this.colNum = colNum; + this.outputColumn = outputColumn; + this.fieldStart = fieldStart; + this.fieldLength = fieldLength; + } + + public VectorUDFTimestampFieldString() { + fieldStart = -1; + fieldLength = -1; + } + + private long getField(byte[] bytes, int start, int length) throws ParseException { + // Validate + for (int i = 0; i < length; i++) { + char ch = (char) bytes[start + i]; + if (ch < patternMin.charAt(i) || ch > patternMax.charAt(i)) { + throw new ParseException("A timestamp string should match 'yyyy-MM-dd HH:mm:ss.fffffffff' pattern.", i); + } + } + + return doGetField(bytes, start, length); + } + + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + int field = 0; + if (length < fieldLength) { + throw new ParseException("A timestamp string should be longer.", 0); + } + for (int i = fieldStart; i < fieldStart + fieldLength; i++) { + byte ch = bytes[start + i]; + field = 10 * field + (ch - '0'); + } + return field; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + BytesColumnVector inputCol = (BytesColumnVector)batch.cols[this.colNum]; + + final int n = inputCol.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + + if (batch.size == 0) { + + // n != batch.size when isRepeating + return; + } + + // true for all algebraic UDFs with no state + outV.isRepeating = inputCol.isRepeating; + + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + try { + outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]); + outV.isNull[i] = false; + } catch (ParseException e) { + outV.noNulls = false; + outV.isNull[i] = true; + } + } + } else { + for (int i = 0; i < n; i++) { + try { + outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]); + outV.isNull[i] = false; + } catch (ParseException e) { + outV.noNulls = false; + outV.isNull[i] = true; + } + } + } + } else { + + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + try { + outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]); + } catch (ParseException e) { + outV.isNull[i] = true; + } + } + } + } else { + for (int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + try { + outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]); + } catch (ParseException e) { + outV.isNull[i] = true; + } + } + } + } + } + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java new file mode 100644 index 0000000..16b4d0d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.io.Text; + +import java.nio.charset.CharacterCodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + +/** + * Return Unix Timestamp. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFUnixTimeStampString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + private transient final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + private transient final Calendar calendar = Calendar.getInstance(); + + public VectorUDFUnixTimeStampString(int colNum, int outputColumn) { + super(colNum, outputColumn, -1, -1); + } + + public VectorUDFUnixTimeStampString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + Date date = null; + try { + date = format.parse(Text.decode(bytes, start, length)); + } catch (CharacterCodingException e) { + throw new ParseException(e.getMessage(), 0); + } + calendar.setTime(date); + return calendar.getTimeInMillis() / 1000; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java new file mode 100644 index 0000000..cb1e6ca --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.io.Text; + +import java.nio.charset.CharacterCodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + +/** + * Expression to get week of year. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFWeekOfYearString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + private transient final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); + private transient final Calendar calendar = Calendar.getInstance(); + + public VectorUDFWeekOfYearString(int colNum, int outputColumn) { + super(colNum, outputColumn, -1, -1); + initCalendar(); + } + + public VectorUDFWeekOfYearString() { + super(); + initCalendar(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + Date date = null; + try { + String decoded = Text.decode(bytes, start, length); + date = format.parse(decoded); + } catch (CharacterCodingException e) { + throw new ParseException(e.getMessage(), 0); + } + calendar.setTime(date); + return calendar.get(Calendar.WEEK_OF_YEAR); + } + + private void initCalendar() { + + // code copied over from UDFWeekOfYear implementation + calendar.setFirstDayOfWeek(Calendar.MONDAY); + calendar.setMinimalDaysInFirstWeek(4); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java new file mode 100644 index 0000000..69acb85 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Expression to get year as a long. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFYearString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFYearString(int colNum, int outputColumn) { + super(colNum, outputColumn, 0, 4); + } + + public VectorUDFYearString() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 151c648..54138f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -96,6 +96,7 @@ import org.apache.hadoop.hive.ql.udf.UDFLog10; import org.apache.hadoop.hive.ql.udf.UDFLog2; import org.apache.hadoop.hive.ql.udf.UDFMinute; +import org.apache.hadoop.hive.ql.udf.UDFMonth; import org.apache.hadoop.hive.ql.udf.UDFOPNegative; import org.apache.hadoop.hive.ql.udf.UDFOPPositive; import org.apache.hadoop.hive.ql.udf.UDFPower; @@ -196,15 +197,16 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFOPOr.class); supportedGenericUDFs.add(GenericUDFOPAnd.class); supportedGenericUDFs.add(GenericUDFOPEqual.class); - supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class); + supportedGenericUDFs.add(UDFLength.class); + supportedGenericUDFs.add(UDFYear.class); + supportedGenericUDFs.add(UDFMonth.class); + supportedGenericUDFs.add(UDFDayOfMonth.class); supportedGenericUDFs.add(UDFHour.class); - supportedGenericUDFs.add(UDFLength.class); supportedGenericUDFs.add(UDFMinute.class); supportedGenericUDFs.add(UDFSecond.class); - supportedGenericUDFs.add(UDFYear.class); supportedGenericUDFs.add(UDFWeekOfYear.class); - supportedGenericUDFs.add(UDFDayOfMonth.class); + supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class); supportedGenericUDFs.add(UDFLike.class); supportedGenericUDFs.add(UDFRegExp.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java index 55eb81b..20add85 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthString; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; @@ -42,7 +43,7 @@ + "'yyyy-MM-dd'.\n" + "Example:\n " + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 30") -@VectorizedExpressions({VectorUDFDayOfMonthLong.class}) +@VectorizedExpressions({VectorUDFDayOfMonthLong.class, VectorUDFDayOfMonthString.class}) public class UDFDayOfMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java index 912b266..155dc29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourString; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -43,7 +44,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 12\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 12") -@VectorizedExpressions({VectorUDFHourLong.class}) +@VectorizedExpressions({VectorUDFHourLong.class, VectorUDFHourString.class}) public class UDFHour extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java index 430d43c..5755adb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteString; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -43,7 +44,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 58\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 58") -@VectorizedExpressions({VectorUDFMinuteLong.class}) +@VectorizedExpressions({VectorUDFMinuteLong.class, VectorUDFMinuteString.class}) public class UDFMinute extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java index 71d9513..4e34dbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java @@ -25,6 +25,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthString; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; @@ -38,6 +41,7 @@ value = "_FUNC_(date) - Returns the month of date", extended = "Example:\n" + " > SELECT _FUNC_('2009-30-07') FROM src LIMIT 1;\n" + " 7") +@VectorizedExpressions({VectorUDFMonthLong.class, VectorUDFMonthString.class}) public class UDFMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java index d672cb6..0292931 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondString; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -43,7 +44,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 59\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 59") -@VectorizedExpressions({VectorUDFSecondLong.class}) +@VectorizedExpressions({VectorUDFSecondLong.class, VectorUDFSecondString.class}) public class UDFSecond extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java index 224aa0c..f076d1d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearString; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; @@ -43,7 +44,7 @@ + " > SELECT _FUNC_('2008-02-20') FROM src LIMIT 1;\n" + " 8\n" + " > SELECT _FUNC_('1980-12-31 12:59:59') FROM src LIMIT 1;\n" + " 1") -@VectorizedExpressions({VectorUDFWeekOfYearLong.class}) +@VectorizedExpressions({VectorUDFWeekOfYearLong.class, VectorUDFWeekOfYearString.class}) public class UDFWeekOfYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java index 9656a65..1853860 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearString; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; @@ -42,7 +43,7 @@ + "'yyyy-MM-dd'.\n" + "Example:\n " + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 2009") -@VectorizedExpressions({VectorUDFYearLong.class}) +@VectorizedExpressions({VectorUDFYearLong.class, VectorUDFYearString.class}) public class UDFYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index d5c4527..dc259c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampString; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; @@ -42,7 +43,7 @@ @Description(name = "to_unix_timestamp", value = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp", extended = "Converts the specified time to number of seconds since 1970-01-01.") -@VectorizedExpressions({VectorUDFUnixTimeStampLong.class}) +@VectorizedExpressions({VectorUDFUnixTimeStampLong.class, VectorUDFUnixTimeStampString.class}) public class GenericUDFToUnixTimeStamp extends GenericUDF { private transient StringObjectInspector intputTextOI; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java index 33a4fe5..41fa02c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java @@ -18,15 +18,22 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; import java.sql.Timestamp; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Arrays; import java.util.Calendar; +import java.util.Date; import java.util.List; import java.util.Random; import junit.framework.Assert; import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -40,12 +47,14 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; import org.junit.Test; /** * Unit tests for timestamp expressions. */ public class TestVectorTimestampExpressions { + private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); /* copied over from VectorUDFTimestampFieldLong */ private TimestampWritable toTimestampWritable(long nanos) { @@ -97,6 +106,34 @@ private VectorizedRowBatch getVectorizedRandomRowBatchLong2(int seed, int size) return batch; } + private VectorizedRowBatch getVectorizedRandomRowBatchStringLong(int seed, int size) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + BytesColumnVector bcv = new BytesColumnVector(size); + Random rand = new Random(seed); + for (int i = 0; i < size; i++) { + /* all 32 bit numbers qualify & multiply up to get nano-seconds */ + byte[] encoded = encodeTime(1000 * 1000 * 1000 * rand.nextInt()); + bcv.vector[i] = encoded; + bcv.start[i] = 0; + bcv.length[i] = encoded.length; + } + batch.cols[0] = bcv; + batch.cols[1] = new LongColumnVector(size); + batch.size = size; + return batch; + } + + private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size, TestType testType) { + switch (testType) { + case LONG2: + return getVectorizedRandomRowBatchLong2(seed, size); + case STRING_LONG: + return getVectorizedRandomRowBatchStringLong(seed, size); + default: + throw new IllegalArgumentException(); + } + } + /* * Input array is used to fill the entire size of the vector row batch */ @@ -112,7 +149,83 @@ private VectorizedRowBatch getVectorizedRowBatchLong2(long[] inputs, int size) { return batch; } - /*begin-macro*/ + /* + * Input array is used to fill the entire size of the vector row batch + */ + private VectorizedRowBatch getVectorizedRowBatchStringLong(long[] inputs, int size) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, size); + BytesColumnVector bcv = new BytesColumnVector(size); + for (int i = 0; i < size; i++) { + byte[] encoded = encodeTime(inputs[i % inputs.length]); + bcv.vector[i] = encoded; + bcv.start[i] = 0; + bcv.length[i] = encoded.length; + } + batch.cols[0] = bcv; + batch.cols[1] = new LongColumnVector(size); + batch.size = size; + return batch; + } + + private VectorizedRowBatch getVectorizedRowBatchStringLong(byte[] vector, int start, int length) { + VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); + BytesColumnVector bcv = new BytesColumnVector(1); + + bcv.vector[0] = vector; + bcv.start[0] = start; + bcv.length[0] = length; + + batch.cols[0] = bcv; + batch.cols[1] = new LongColumnVector(1); + batch.size = 1; + return batch; + } + + private VectorizedRowBatch getVectorizedRowBatch(long[] inputs, int size, TestType testType) { + switch (testType) { + case LONG2: + return getVectorizedRowBatchLong2(inputs, size); + case STRING_LONG: + return getVectorizedRowBatchStringLong(inputs, size); + default: + throw new IllegalArgumentException(); + } + } + + private byte[] encodeTime(long time) { + ByteBuffer encoded; + try { + String formatted = dateFormat.format(new Date(time / (1000 * 1000))); + encoded = Text.encode(formatted); + } catch (CharacterCodingException e) { + throw new RuntimeException(e); + } + return Arrays.copyOf(encoded.array(), encoded.limit()); + } + + private long decodeTime(byte[] time) { + try { + return dateFormat.parse(Text.decode(time)).getTime() * 1000 * 1000; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private long readVectorElementAt(ColumnVector col, int i) { + if (col instanceof LongColumnVector) { + return ((LongColumnVector) col).vector[i]; + } + if (col instanceof BytesColumnVector) { + byte[] timeBytes = ((BytesColumnVector) col).vector[i]; + return decodeTime(timeBytes); + } + throw new IllegalArgumentException(); + } + + private enum TestType { + LONG2, STRING_LONG + } + private void compareToUDFYearLong(long t, int y) { UDFYear udf = new UDFYear(); TimestampWritable tsw = toTimestampWritable(t); @@ -120,20 +233,23 @@ private void compareToUDFYearLong(long t, int y) { Assert.assertEquals(res.get(), y); } - private void verifyUDFYearLong(VectorizedRowBatch batch) { - /* col[1] = UDFYear(col[0]) */ - VectorUDFYearLong udf = new VectorUDFYearLong(0, 1); + private void verifyUDFYear(VectorizedRowBatch batch, TestType testType) { + VectorExpression udf = null; + if (testType == TestType.LONG2) { + udf = new VectorUDFYearLong(0, 1); + } else { + udf = new VectorUDFYearString(0, 1); + } udf.evaluate(batch); final int in = 0; final int out = 1; - Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFYearLong(t, (int) y); } else { @@ -142,40 +258,54 @@ private void verifyUDFYearLong(VectorizedRowBatch batch) { } } - @Test - public void testVectorUDFYearLong() { - VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0}, - VectorizedRowBatch.DEFAULT_SIZE); + private void testVectorUDFYear(TestType testType) { + VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); - verifyUDFYearLong(batch); + verifyUDFYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFYearLong(batch); + verifyUDFYear(batch, testType); long[] boundaries = getAllBoundaries(); - batch = getVectorizedRowBatchLong2(boundaries, boundaries.length); - verifyUDFYearLong(batch); + batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); + verifyUDFYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFYearLong(batch); + verifyUDFYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFYearLong(batch); + verifyUDFYear(batch, testType); - batch = getVectorizedRowBatchLong2(new long[] {0}, 1); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); batch.cols[0].isRepeating = true; - verifyUDFYearLong(batch); + verifyUDFYear(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; - verifyUDFYearLong(batch); + verifyUDFYear(batch, testType); - batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE); - verifyUDFYearLong(batch); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); + verifyUDFYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFYearLong(batch); + verifyUDFYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFYearLong(batch); + verifyUDFYear(batch, testType); + } + + @Test + public void testVectorUDFYearLong() { + testVectorUDFYear(TestType.LONG2); } - /*end-macro*/ + @Test + public void testVectorUDFYearString() { + testVectorUDFYear(TestType.STRING_LONG); + + VectorizedRowBatch batch = getVectorizedRowBatchStringLong(new byte[] {'2', '2', '0', '1', '3'}, 1, 3); + VectorExpression udf = new VectorUDFYearString(0, 1); + udf.evaluate(batch); + LongColumnVector lcv = (LongColumnVector) batch.cols[1]; + Assert.assertEquals(false, batch.cols[0].isNull[0]); + Assert.assertEquals(true, lcv.isNull[0]); + } private void compareToUDFDayOfMonthLong(long t, int y) { UDFDayOfMonth udf = new UDFDayOfMonth(); @@ -184,20 +314,23 @@ private void compareToUDFDayOfMonthLong(long t, int y) { Assert.assertEquals(res.get(), y); } - private void verifyUDFDayOfMonthLong(VectorizedRowBatch batch) { - /* col[1] = UDFDayOfMonth(col[0]) */ - VectorUDFDayOfMonthLong udf = new VectorUDFDayOfMonthLong(0, 1); + private void verifyUDFDayOfMonth(VectorizedRowBatch batch, TestType testType) { + VectorExpression udf = null; + if (testType == TestType.LONG2) { + udf = new VectorUDFDayOfMonthLong(0, 1); + } else { + udf = new VectorUDFDayOfMonthString(0, 1); + } udf.evaluate(batch); final int in = 0; final int out = 1; - Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFDayOfMonthLong(t, (int) y); } else { @@ -206,37 +339,46 @@ private void verifyUDFDayOfMonthLong(VectorizedRowBatch batch) { } } - @Test - public void testVectorUDFDayOfMonthLong() { - VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0}, - VectorizedRowBatch.DEFAULT_SIZE); + private void testVectorUDFDayOfMonth(TestType testType) { + VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); - verifyUDFDayOfMonthLong(batch); + verifyUDFDayOfMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFDayOfMonthLong(batch); + verifyUDFDayOfMonth(batch, testType); long[] boundaries = getAllBoundaries(); - batch = getVectorizedRowBatchLong2(boundaries, boundaries.length); - verifyUDFDayOfMonthLong(batch); + batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); + verifyUDFDayOfMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFDayOfMonthLong(batch); + verifyUDFDayOfMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFDayOfMonthLong(batch); + verifyUDFDayOfMonth(batch, testType); - batch = getVectorizedRowBatchLong2(new long[] {0}, 1); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); batch.cols[0].isRepeating = true; - verifyUDFDayOfMonthLong(batch); + verifyUDFDayOfMonth(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; - verifyUDFDayOfMonthLong(batch); + verifyUDFDayOfMonth(batch, testType); - batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE); - verifyUDFDayOfMonthLong(batch); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); + verifyUDFDayOfMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFDayOfMonthLong(batch); + verifyUDFDayOfMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFDayOfMonthLong(batch); + verifyUDFDayOfMonth(batch, testType); + } + + @Test + public void testVectorUDFDayOfMonthLong() { + testVectorUDFDayOfMonth(TestType.LONG2); + } + + @Test + public void testVectorUDFDayOfMonthString() { + testVectorUDFDayOfMonth(TestType.STRING_LONG); } private void compareToUDFHourLong(long t, int y) { @@ -246,20 +388,23 @@ private void compareToUDFHourLong(long t, int y) { Assert.assertEquals(res.get(), y); } - private void verifyUDFHourLong(VectorizedRowBatch batch) { - /* col[1] = UDFHour(col[0]) */ - VectorUDFHourLong udf = new VectorUDFHourLong(0, 1); + private void verifyUDFHour(VectorizedRowBatch batch, TestType testType) { + VectorExpression udf = null; + if (testType == TestType.LONG2) { + udf = new VectorUDFHourLong(0, 1); + } else { + udf = new VectorUDFHourString(0, 1); + } udf.evaluate(batch); final int in = 0; final int out = 1; - Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFHourLong(t, (int) y); } else { @@ -268,37 +413,46 @@ private void verifyUDFHourLong(VectorizedRowBatch batch) { } } - @Test - public void testVectorUDFHourLong() { - VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0}, - VectorizedRowBatch.DEFAULT_SIZE); + private void testVectorUDFHour(TestType testType) { + VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); - verifyUDFHourLong(batch); + verifyUDFHour(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFHourLong(batch); + verifyUDFHour(batch, testType); long[] boundaries = getAllBoundaries(); - batch = getVectorizedRowBatchLong2(boundaries, boundaries.length); - verifyUDFHourLong(batch); + batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); + verifyUDFHour(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFHourLong(batch); + verifyUDFHour(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFHourLong(batch); + verifyUDFHour(batch, testType); - batch = getVectorizedRowBatchLong2(new long[] {0}, 1); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); batch.cols[0].isRepeating = true; - verifyUDFHourLong(batch); + verifyUDFHour(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; - verifyUDFHourLong(batch); + verifyUDFHour(batch, testType); - batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE); - verifyUDFHourLong(batch); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); + verifyUDFHour(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFHourLong(batch); + verifyUDFHour(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFHourLong(batch); + verifyUDFHour(batch, testType); + } + + @Test + public void testVectorUDFHourLong() { + testVectorUDFHour(TestType.LONG2); + } + + @Test + public void testVectorUDFHourString() { + testVectorUDFHour(TestType.STRING_LONG); } private void compareToUDFMinuteLong(long t, int y) { @@ -308,20 +462,23 @@ private void compareToUDFMinuteLong(long t, int y) { Assert.assertEquals(res.get(), y); } - private void verifyUDFMinuteLong(VectorizedRowBatch batch) { - /* col[1] = UDFMinute(col[0]) */ - VectorUDFMinuteLong udf = new VectorUDFMinuteLong(0, 1); + private void verifyUDFMinute(VectorizedRowBatch batch, TestType testType) { + VectorExpression udf = null; + if (testType == TestType.LONG2) { + udf = new VectorUDFMinuteLong(0, 1); + } else { + udf = new VectorUDFMinuteString(0, 1); + } udf.evaluate(batch); final int in = 0; final int out = 1; - Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFMinuteLong(t, (int) y); } else { @@ -330,37 +487,46 @@ private void verifyUDFMinuteLong(VectorizedRowBatch batch) { } } - @Test - public void testVectorUDFMinuteLong() { - VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0}, - VectorizedRowBatch.DEFAULT_SIZE); + private void testVectorUDFMinute(TestType testType) { + VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); - verifyUDFMinuteLong(batch); + verifyUDFMinute(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFMinuteLong(batch); + verifyUDFMinute(batch, testType); long[] boundaries = getAllBoundaries(); - batch = getVectorizedRowBatchLong2(boundaries, boundaries.length); - verifyUDFMinuteLong(batch); + batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); + verifyUDFMinute(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFMinuteLong(batch); + verifyUDFMinute(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFMinuteLong(batch); + verifyUDFMinute(batch, testType); - batch = getVectorizedRowBatchLong2(new long[] {0}, 1); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); batch.cols[0].isRepeating = true; - verifyUDFMinuteLong(batch); + verifyUDFMinute(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; - verifyUDFMinuteLong(batch); + verifyUDFMinute(batch, testType); - batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE); - verifyUDFMinuteLong(batch); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); + verifyUDFMinute(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFMinuteLong(batch); + verifyUDFMinute(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFMinuteLong(batch); + verifyUDFMinute(batch, testType); + } + + @Test + public void testVectorUDFMinuteLong() { + testVectorUDFMinute(TestType.LONG2); + } + + @Test + public void testVectorUDFMinuteString() { + testVectorUDFMinute(TestType.STRING_LONG); } private void compareToUDFMonthLong(long t, int y) { @@ -370,20 +536,23 @@ private void compareToUDFMonthLong(long t, int y) { Assert.assertEquals(res.get(), y); } - private void verifyUDFMonthLong(VectorizedRowBatch batch) { - /* col[1] = UDFMonth(col[0]) */ - VectorUDFMonthLong udf = new VectorUDFMonthLong(0, 1); + private void verifyUDFMonth(VectorizedRowBatch batch, TestType testType) { + VectorExpression udf; + if (testType == TestType.LONG2) { + udf = new VectorUDFMonthLong(0, 1); + } else { + udf = new VectorUDFMonthString(0, 1); + } udf.evaluate(batch); final int in = 0; final int out = 1; - Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFMonthLong(t, (int) y); } else { @@ -392,37 +561,46 @@ private void verifyUDFMonthLong(VectorizedRowBatch batch) { } } - @Test - public void testVectorUDFMonthLong() { - VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0}, - VectorizedRowBatch.DEFAULT_SIZE); + private void testVectorUDFMonth(TestType testType) { + VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); - verifyUDFMonthLong(batch); + verifyUDFMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFMonthLong(batch); + verifyUDFMonth(batch, testType); long[] boundaries = getAllBoundaries(); - batch = getVectorizedRowBatchLong2(boundaries, boundaries.length); - verifyUDFMonthLong(batch); + batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); + verifyUDFMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFMonthLong(batch); + verifyUDFMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFMonthLong(batch); + verifyUDFMonth(batch, testType); - batch = getVectorizedRowBatchLong2(new long[] {0}, 1); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); batch.cols[0].isRepeating = true; - verifyUDFMonthLong(batch); + verifyUDFMonth(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; - verifyUDFMonthLong(batch); + verifyUDFMonth(batch, testType); - batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE); - verifyUDFMonthLong(batch); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); + verifyUDFMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFMonthLong(batch); + verifyUDFMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFMonthLong(batch); + verifyUDFMonth(batch, testType); + } + + @Test + public void testVectorUDFMonthLong() { + testVectorUDFMonth(TestType.LONG2); + } + + @Test + public void testVectorUDFMonthString() { + testVectorUDFMonth(TestType.STRING_LONG); } private void compareToUDFSecondLong(long t, int y) { @@ -432,20 +610,23 @@ private void compareToUDFSecondLong(long t, int y) { Assert.assertEquals(res.get(), y); } - private void verifyUDFSecondLong(VectorizedRowBatch batch) { - /* col[1] = UDFSecond(col[0]) */ - VectorUDFSecondLong udf = new VectorUDFSecondLong(0, 1); + private void verifyUDFSecond(VectorizedRowBatch batch, TestType testType) { + VectorExpression udf; + if (testType == TestType.LONG2) { + udf = new VectorUDFSecondLong(0, 1); + } else { + udf = new VectorUDFSecondString(0, 1); + } udf.evaluate(batch); final int in = 0; final int out = 1; - Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFSecondLong(t, (int) y); } else { @@ -454,37 +635,46 @@ private void verifyUDFSecondLong(VectorizedRowBatch batch) { } } - @Test - public void testVectorUDFSecondLong() { - VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0}, - VectorizedRowBatch.DEFAULT_SIZE); + private void testVectorUDFSecond(TestType testType) { + VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); - verifyUDFSecondLong(batch); + verifyUDFSecond(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFSecondLong(batch); + verifyUDFSecond(batch, testType); long[] boundaries = getAllBoundaries(); - batch = getVectorizedRowBatchLong2(boundaries, boundaries.length); - verifyUDFSecondLong(batch); + batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); + verifyUDFSecond(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFSecondLong(batch); + verifyUDFSecond(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFSecondLong(batch); + verifyUDFSecond(batch, testType); - batch = getVectorizedRowBatchLong2(new long[] {0}, 1); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); batch.cols[0].isRepeating = true; - verifyUDFSecondLong(batch); + verifyUDFSecond(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; - verifyUDFSecondLong(batch); + verifyUDFSecond(batch, testType); - batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE); - verifyUDFSecondLong(batch); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); + verifyUDFSecond(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFSecondLong(batch); + verifyUDFSecond(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFSecondLong(batch); + verifyUDFSecond(batch, testType); + } + + @Test + public void testVectorUDFSecondLong() { + testVectorUDFSecond(TestType.LONG2); + } + + @Test + public void testVectorUDFSecondString() { + testVectorUDFSecond(TestType.STRING_LONG); } private LongWritable getLongWritable(TimestampWritable i) { @@ -508,20 +698,23 @@ private void compareToUDFUnixTimeStampLong(long t, long y) { Assert.assertEquals(res.get(), y); } - private void verifyUDFUnixTimeStampLong(VectorizedRowBatch batch) { - /* col[1] = UDFUnixTimeStamp(col[0]) */ - VectorUDFUnixTimeStampLong udf = new VectorUDFUnixTimeStampLong(0, 1); + private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch, TestType testType) { + VectorExpression udf; + if (testType == TestType.LONG2) { + udf = new VectorUDFUnixTimeStampLong(0, 1); + } else { + udf = new VectorUDFUnixTimeStampString(0, 1); + } udf.evaluate(batch); final int in = 0; final int out = 1; - Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { - if (!batch.cols[in].noNulls) { + if (!batch.cols[out].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFUnixTimeStampLong(t, y); } else { @@ -530,37 +723,46 @@ private void verifyUDFUnixTimeStampLong(VectorizedRowBatch batch) { } } - @Test - public void testVectorUDFUnixTimeStampLong() { - VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0}, - VectorizedRowBatch.DEFAULT_SIZE); + private void testVectorUDFUnixTimeStamp(TestType testType) { + VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); - verifyUDFUnixTimeStampLong(batch); + verifyUDFUnixTimeStamp(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFUnixTimeStampLong(batch); + verifyUDFUnixTimeStamp(batch, testType); long[] boundaries = getAllBoundaries(); - batch = getVectorizedRowBatchLong2(boundaries, boundaries.length); - verifyUDFUnixTimeStampLong(batch); + batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); + verifyUDFUnixTimeStamp(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFUnixTimeStampLong(batch); + verifyUDFUnixTimeStamp(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFUnixTimeStampLong(batch); + verifyUDFUnixTimeStamp(batch, testType); - batch = getVectorizedRowBatchLong2(new long[] {0}, 1); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); batch.cols[0].isRepeating = true; - verifyUDFUnixTimeStampLong(batch); + verifyUDFUnixTimeStamp(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; - verifyUDFUnixTimeStampLong(batch); + verifyUDFUnixTimeStamp(batch, testType); - batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE); - verifyUDFUnixTimeStampLong(batch); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); + verifyUDFUnixTimeStamp(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFUnixTimeStampLong(batch); + verifyUDFUnixTimeStamp(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFUnixTimeStampLong(batch); + verifyUDFUnixTimeStamp(batch, testType); + } + + @Test + public void testVectorUDFUnixTimeStampLong() { + testVectorUDFUnixTimeStamp(TestType.LONG2); + } + + @Test + public void testVectorUDFUnixTimeStampString() { + testVectorUDFUnixTimeStamp(TestType.STRING_LONG); } private void compareToUDFWeekOfYearLong(long t, int y) { @@ -570,20 +772,20 @@ private void compareToUDFWeekOfYearLong(long t, int y) { Assert.assertEquals(res.get(), y); } - private void verifyUDFWeekOfYearLong(VectorizedRowBatch batch) { - /* col[1] = UDFWeekOfYear(col[0]) */ - VectorUDFWeekOfYearLong udf = new VectorUDFWeekOfYearLong(0, 1); + private void verifyUDFWeekOfYear(VectorizedRowBatch batch, TestType testType) { + VectorExpression udf; + if (testType == TestType.LONG2) { + udf = new VectorUDFWeekOfYearLong(0, 1); + } else { + udf = new VectorUDFWeekOfYearString(0, 1); + } udf.evaluate(batch); final int in = 0; final int out = 1; - Assert.assertEquals(batch.cols[in].noNulls, batch.cols[out].noNulls); for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { - if (!batch.cols[in].noNulls) { - Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); - } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; + long t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFWeekOfYearLong(t, (int) y); } else { @@ -592,37 +794,46 @@ private void verifyUDFWeekOfYearLong(VectorizedRowBatch batch) { } } - @Test - public void testVectorUDFWeekOfYearLong() { - VectorizedRowBatch batch = getVectorizedRowBatchLong2(new long[] {0}, - VectorizedRowBatch.DEFAULT_SIZE); + private void testVectorUDFWeekOfYear(TestType testType) { + VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); - verifyUDFWeekOfYearLong(batch); + verifyUDFWeekOfYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFWeekOfYearLong(batch); + verifyUDFWeekOfYear(batch, testType); long[] boundaries = getAllBoundaries(); - batch = getVectorizedRowBatchLong2(boundaries, boundaries.length); - verifyUDFWeekOfYearLong(batch); + batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); + verifyUDFWeekOfYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFWeekOfYearLong(batch); + verifyUDFWeekOfYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFWeekOfYearLong(batch); + verifyUDFWeekOfYear(batch, testType); - batch = getVectorizedRowBatchLong2(new long[] {0}, 1); + batch = getVectorizedRowBatch(new long[] {0}, 1, testType); batch.cols[0].isRepeating = true; - verifyUDFWeekOfYearLong(batch); + verifyUDFWeekOfYear(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; - verifyUDFWeekOfYearLong(batch); + verifyUDFWeekOfYear(batch, testType); - batch = getVectorizedRandomRowBatchLong2(200, VectorizedRowBatch.DEFAULT_SIZE); - verifyUDFWeekOfYearLong(batch); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, testType); + verifyUDFWeekOfYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); - verifyUDFWeekOfYearLong(batch); + verifyUDFWeekOfYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); - verifyUDFWeekOfYearLong(batch); + verifyUDFWeekOfYear(batch, testType); + } + + @Test + public void testVectorUDFWeekOfYearLong() { + testVectorUDFWeekOfYear(TestType.LONG2); + } + + @Test + public void testVectorUDFWeekOfYearString() { + testVectorUDFWeekOfYear(TestType.STRING_LONG); } public static void main(String[] args) { @@ -633,6 +844,12 @@ public static void main(String[] args) { self.testVectorUDFHourLong(); self.testVectorUDFWeekOfYearLong(); self.testVectorUDFUnixTimeStampLong(); + self.testVectorUDFYearString(); + self.testVectorUDFMonthString(); + self.testVectorUDFDayOfMonthString(); + self.testVectorUDFHourString(); + self.testVectorUDFWeekOfYearString(); + self.testVectorUDFUnixTimeStampString(); } } diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q new file mode 100644 index 0000000..6975839 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -0,0 +1,116 @@ +SET hive.vectorized.execution.enabled = true; + +-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC; + +INSERT OVERWRITE TABLE alltypesorc_string +SELECT + to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'), + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) +FROM alltypesorc +LIMIT 40; + +CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC; + +INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; +INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1; +INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1; + +EXPLAIN SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string; + +SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string; + +EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string; + +SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string; + +EXPLAIN SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string; + +-- Should all be true or NULL +SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string; + +-- Wrong format. Should all be NULL. +EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_wrong; + +SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_wrong; diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out new file mode 100644 index 0000000..29f0aef --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -0,0 +1,647 @@ +PREHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypesorc_string +PREHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string +SELECT + to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'), + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) +FROM alltypesorc +LIMIT 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string +SELECT + to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'), + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) +FROM alltypesorc +LIMIT 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +PREHOOK: query: CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypesorc_wrong +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +PREHOOK: query: INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_wrong +POSTHOOK: query: INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_wrong +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_wrong +POSTHOOK: query: INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_wrong +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_wrong +POSTHOOK: query: INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_wrong +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION day (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION hour (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION minute (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION second (TOK_TABLE_OR_COL ctimestamp1)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc_string + TableScan + alias: alltypesorc_string + Select Operator + expressions: + expr: to_unix_timestamp(ctimestamp1) + type: bigint + expr: year(ctimestamp1) + type: int + expr: month(ctimestamp1) + type: int + expr: day(ctimestamp1) + type: int + expr: dayofmonth(ctimestamp1) + type: int + expr: weekofyear(ctimestamp1) + type: int + expr: hour(ctimestamp1) + type: int + expr: minute(ctimestamp1) + type: int + expr: second(ctimestamp1) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +28786 1969 12 31 31 1 23 59 46 +NULL NULL NULL NULL NULL NULL NULL NULL NULL +28795 1969 12 31 31 1 23 59 55 +28784 1969 12 31 31 1 23 59 44 +28790 1969 12 31 31 1 23 59 50 +28815 1970 1 1 1 1 0 0 15 +28807 1970 1 1 1 1 0 0 7 +28804 1970 1 1 1 1 0 0 4 +28792 1969 12 31 31 1 23 59 52 +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28805 1970 1 1 1 1 0 0 5 +28814 1970 1 1 1 1 0 0 14 +28813 1970 1 1 1 1 0 0 13 +28787 1969 12 31 31 1 23 59 47 +28809 1970 1 1 1 1 0 0 9 +28792 1969 12 31 31 1 23 59 52 +28800 1970 1 1 1 1 0 0 0 +28805 1970 1 1 1 1 0 0 5 +28798 1969 12 31 31 1 23 59 58 +28789 1969 12 31 31 1 23 59 49 +28811 1970 1 1 1 1 0 0 11 +28808 1970 1 1 1 1 0 0 8 +28802 1970 1 1 1 1 0 0 2 +28804 1970 1 1 1 1 0 0 4 +28784 1969 12 31 31 1 23 59 44 +28789 1969 12 31 31 1 23 59 49 +28792 1969 12 31 31 1 23 59 52 +28807 1970 1 1 1 1 0 0 7 +28795 1969 12 31 31 1 23 59 55 +28800 1970 1 1 1 1 0 0 0 +28798 1969 12 31 31 1 23 59 58 +28807 1970 1 1 1 1 0 0 7 +28795 1969 12 31 31 1 23 59 55 +28785 1969 12 31 31 1 23 59 45 +28788 1969 12 31 31 1 23 59 48 +28803 1970 1 1 1 1 0 0 3 +28808 1970 1 1 1 1 0 0 8 +28792 1969 12 31 31 1 23 59 52 +28806 1970 1 1 1 1 0 0 6 +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION day (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION hour (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION minute (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION second (TOK_TABLE_OR_COL stimestamp1)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc_string + TableScan + alias: alltypesorc_string + Select Operator + expressions: + expr: to_unix_timestamp(stimestamp1) + type: bigint + expr: year(stimestamp1) + type: int + expr: month(stimestamp1) + type: int + expr: day(stimestamp1) + type: int + expr: dayofmonth(stimestamp1) + type: int + expr: weekofyear(stimestamp1) + type: int + expr: hour(stimestamp1) + type: int + expr: minute(stimestamp1) + type: int + expr: second(stimestamp1) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +28786 1969 12 31 31 1 23 59 46 +NULL NULL NULL NULL NULL NULL NULL NULL NULL +28795 1969 12 31 31 1 23 59 55 +28784 1969 12 31 31 1 23 59 44 +28790 1969 12 31 31 1 23 59 50 +28815 1970 1 1 1 1 0 0 15 +28807 1970 1 1 1 1 0 0 7 +28804 1970 1 1 1 1 0 0 4 +28792 1969 12 31 31 1 23 59 52 +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28805 1970 1 1 1 1 0 0 5 +28814 1970 1 1 1 1 0 0 14 +28813 1970 1 1 1 1 0 0 13 +28787 1969 12 31 31 1 23 59 47 +28809 1970 1 1 1 1 0 0 9 +28792 1969 12 31 31 1 23 59 52 +28800 1970 1 1 1 1 0 0 0 +28805 1970 1 1 1 1 0 0 5 +28798 1969 12 31 31 1 23 59 58 +28789 1969 12 31 31 1 23 59 49 +28811 1970 1 1 1 1 0 0 11 +28808 1970 1 1 1 1 0 0 8 +28802 1970 1 1 1 1 0 0 2 +28804 1970 1 1 1 1 0 0 4 +28784 1969 12 31 31 1 23 59 44 +28789 1969 12 31 31 1 23 59 49 +28792 1969 12 31 31 1 23 59 52 +28807 1970 1 1 1 1 0 0 7 +28795 1969 12 31 31 1 23 59 55 +28800 1970 1 1 1 1 0 0 0 +28798 1969 12 31 31 1 23 59 58 +28807 1970 1 1 1 1 0 0 7 +28795 1969 12 31 31 1 23 59 55 +28785 1969 12 31 31 1 23 59 45 +28788 1969 12 31 31 1 23 59 48 +28803 1970 1 1 1 1 0 0 3 +28808 1970 1 1 1 1 0 0 8 +28792 1969 12 31 31 1 23 59 52 +28806 1970 1 1 1 1 0 0 6 +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (= (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION year (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION year (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION month (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION month (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION day (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION day (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION hour (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION hour (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION minute (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION minute (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION second (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION second (TOK_TABLE_OR_COL stimestamp1))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc_string + TableScan + alias: alltypesorc_string + Select Operator + expressions: + expr: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) + type: boolean + expr: (year(ctimestamp1) = year(stimestamp1)) + type: boolean + expr: (month(ctimestamp1) = month(stimestamp1)) + type: boolean + expr: (day(ctimestamp1) = day(stimestamp1)) + type: boolean + expr: (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) + type: boolean + expr: (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) + type: boolean + expr: (hour(ctimestamp1) = hour(stimestamp1)) + type: boolean + expr: (minute(ctimestamp1) = minute(stimestamp1)) + type: boolean + expr: (second(ctimestamp1) = second(stimestamp1)) + type: boolean + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- Should all be true or NULL +SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: -- Should all be true or NULL +SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +true true true true true true true true true +NULL NULL NULL NULL NULL NULL NULL NULL NULL +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +PREHOOK: query: -- Wrong format. Should all be NULL. +EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_wrong +PREHOOK: type: QUERY +POSTHOOK: query: -- Wrong format. Should all be NULL. +EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_wrong +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_wrong))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION day (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION hour (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION minute (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION second (TOK_TABLE_OR_COL stimestamp1)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc_wrong + TableScan + alias: alltypesorc_wrong + Select Operator + expressions: + expr: to_unix_timestamp(stimestamp1) + type: bigint + expr: year(stimestamp1) + type: int + expr: month(stimestamp1) + type: int + expr: day(stimestamp1) + type: int + expr: dayofmonth(stimestamp1) + type: int + expr: weekofyear(stimestamp1) + type: int + expr: hour(stimestamp1) + type: int + expr: minute(stimestamp1) + type: int + expr: second(stimestamp1) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_wrong +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_wrong +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_wrong +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_wrong +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE [] +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL