diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java new file mode 100644 index 0000000..8b996af --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthString.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.text.ParseException; + +/** + * Expression to get day of month. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFDayOfMonthString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFDayOfMonthString(int colNum, int outputColumn) { + super(colNum, outputColumn); + } + + public VectorUDFDayOfMonthString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + int day = 0; + for (int i = 8; i < 10; i++) { + byte ch = bytes[start + i]; + day = 10 * day + (ch - '0'); + } + return day; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java new file mode 100644 index 0000000..1dad9ca --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourString.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.text.ParseException; + +/** + * Returns hour of day. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFHourString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFHourString(int colNum, int outputColumn) { + super(colNum, outputColumn); + } + + public VectorUDFHourString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + int hour = 0; + for (int i = 11; i < 13; i++) { + byte ch = bytes[start + i]; + hour = 10 * hour + (ch - '0'); + } + return hour; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java new file mode 100644 index 0000000..1516eff --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteString.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.text.ParseException; + +/** + * Returns minute value. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFMinuteString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFMinuteString(int colNum, int outputColumn) { + super(colNum, outputColumn); + } + + public VectorUDFMinuteString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + int minute = 0; + for (int i = 14; i < 16; i++) { + byte ch = bytes[start + i]; + minute = 10 * minute + (ch - '0'); + } + return minute; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java new file mode 100644 index 0000000..8963e2b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthString.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.text.ParseException; + +/** + * Returns month value. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFMonthString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFMonthString(int colNum, int outputColumn) { + super(colNum, outputColumn); + } + + public VectorUDFMonthString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + int month = 0; + for (int i = 5; i < 7; i++) { + byte ch = bytes[start + i]; + month = 10 * month + (ch - '0'); + } + return month; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java new file mode 100644 index 0000000..2d0104c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondString.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.text.ParseException; + +/** + * Expression to get seconds. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFSecondString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFSecondString(int colNum, int outputColumn) { + super(colNum, outputColumn); + } + + public VectorUDFSecondString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + int second = 0; + for (int i = 17; i < 19; i++) { + byte ch = bytes[start + i]; + second = 10 * second + (ch - '0'); + } + return second; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java new file mode 100644 index 0000000..c0dac74 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -0,0 +1,168 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import java.nio.charset.CharacterCodingException; +import java.text.ParseException; + +/** + * Abstract class to return various fields from a String. + */ +public abstract class VectorUDFTimestampFieldString extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected int outputColumn; + private static final String patternMin = "0000-00-00 00:00:00.000000000"; + private static final String patternMax = "9999-19-99 29:59:59.999999999"; + + public VectorUDFTimestampFieldString(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public VectorUDFTimestampFieldString() { + } + + private long getField(byte[] bytes, int start, int length) throws CharacterCodingException, ParseException { + // Validate + for (int i = 0; i < length; i++) { + char ch = (char) bytes[start + i]; + if (ch < patternMin.charAt(i) || ch > patternMax.charAt(i)) { + throw new ParseException("A timestamp string must match 'yyyy-MM-dd HH:mm:ss.fffffffff' pattern.", i); + } + } + + return doGetField(bytes, start, length); + } + + protected abstract long doGetField(byte[] bytes, int start, int length) throws ParseException; + + @Override + public void evaluate(VectorizedRowBatch batch) { + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + BytesColumnVector inputCol = (BytesColumnVector)batch.cols[this.colNum]; + /* every line below this is identical for evaluateLong & evaluateString */ + final int n = inputCol.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + + if (batch.size == 0) { + /* n != batch.size when isRepeating */ + return; + } + + /* true for all algebraic UDFs with no state */ + outV.isRepeating = inputCol.isRepeating; + + if (inputCol.noNulls) { + outV.noNulls = true; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + try { + outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]); + outV.isNull[i] = false; + } catch (Exception e) { + outV.noNulls = false; + outV.isNull[i] = true; + } + } + } else { + for (int i = 0; i < n; i++) { + try { + outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]); + outV.isNull[i] = false; + } catch (Exception e) { + outV.noNulls = false; + outV.isNull[i] = true; + } + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + try { + outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]); + } catch (Exception e) { + outV.isNull[i] = true; + } + } + } + } else { + for (int i = 0; i < n; i++) { + outV.isNull[i] = inputCol.isNull[i]; + if (!inputCol.isNull[i]) { + try { + outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]); + } catch (Exception e) { + outV.isNull[i] = true; + } + } + } + } + } + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java new file mode 100644 index 0000000..cdb2b97 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.io.Text; + +import java.nio.charset.CharacterCodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + +/** + * Return Unix Timestamp. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFUnixTimeStampString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + private transient final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + private transient final Calendar calendar = Calendar.getInstance(); + + public VectorUDFUnixTimeStampString(int colNum, int outputColumn) { + super(colNum, outputColumn); + } + + public VectorUDFUnixTimeStampString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + Date date = null; + try { + date = format.parse(Text.decode(bytes, start, length)); + } catch (CharacterCodingException e) { + throw new ParseException(e.getMessage(), 0); + } + calendar.setTime(date); + return calendar.getTimeInMillis() / 1000; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java new file mode 100644 index 0000000..2e72140 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearString.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.io.Text; + +import java.nio.charset.CharacterCodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + +/** + * Expression to get week of year. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFWeekOfYearString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + private transient final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); + private transient final Calendar calendar = Calendar.getInstance(); + + public VectorUDFWeekOfYearString(int colNum, int outputColumn) { + super(colNum, outputColumn); + initCalendar(); + } + + public VectorUDFWeekOfYearString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + Date date = null; + try { + date = format.parse(Text.decode(bytes, start, length)); + } catch (CharacterCodingException e) { + throw new ParseException(e.getMessage(), 0); + } + calendar.setTime(date); + return calendar.get(Calendar.WEEK_OF_YEAR); + } + + private void initCalendar() { + /* code copied over from UDFWeekOfYear implementation */ + calendar.setFirstDayOfWeek(Calendar.MONDAY); + calendar.setMinimalDaysInFirstWeek(4); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java new file mode 100644 index 0000000..21bb6df --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearString.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.text.ParseException; + +/** + * Expression to get year as a long. + * Extends {@link VectorUDFTimestampFieldString} + */ +public final class VectorUDFYearString extends VectorUDFTimestampFieldString { + + private static final long serialVersionUID = 1L; + + public VectorUDFYearString(int colNum, int outputColumn) { + super(colNum, outputColumn); + } + + public VectorUDFYearString() { + super(); + } + + @Override + protected long doGetField(byte[] bytes, int start, int length) throws ParseException { + int year = 0; + for (int i = 0; i < 4; i++) { + char ch = (char) bytes[start + i]; + year = 10 * year + (ch - '0'); + } + return year; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f58a24a..8684626 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -125,15 +125,16 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFOPOr.class); supportedGenericUDFs.add(GenericUDFOPAnd.class); supportedGenericUDFs.add(GenericUDFOPEqual.class); - supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class); + supportedGenericUDFs.add(UDFLength.class); + supportedGenericUDFs.add(UDFYear.class); + supportedGenericUDFs.add(UDFMonth.class); + supportedGenericUDFs.add(UDFDayOfMonth.class); supportedGenericUDFs.add(UDFHour.class); - supportedGenericUDFs.add(UDFLength.class); supportedGenericUDFs.add(UDFMinute.class); supportedGenericUDFs.add(UDFSecond.class); - supportedGenericUDFs.add(UDFYear.class); supportedGenericUDFs.add(UDFWeekOfYear.class); - supportedGenericUDFs.add(UDFDayOfMonth.class); + supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class); supportedGenericUDFs.add(UDFLike.class); supportedGenericUDFs.add(UDFRegExp.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java index 55eb81b..20add85 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfMonth.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDayOfMonthString; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; @@ -42,7 +43,7 @@ + "'yyyy-MM-dd'.\n" + "Example:\n " + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 30") -@VectorizedExpressions({VectorUDFDayOfMonthLong.class}) +@VectorizedExpressions({VectorUDFDayOfMonthLong.class, VectorUDFDayOfMonthString.class}) public class UDFDayOfMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java index 912b266..155dc29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourString; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -43,7 +44,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 12\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 12") -@VectorizedExpressions({VectorUDFHourLong.class}) +@VectorizedExpressions({VectorUDFHourLong.class, VectorUDFHourString.class}) public class UDFHour extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java index 430d43c..5755adb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteString; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -43,7 +44,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 58\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 58") -@VectorizedExpressions({VectorUDFMinuteLong.class}) +@VectorizedExpressions({VectorUDFMinuteLong.class, VectorUDFMinuteString.class}) public class UDFMinute extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private final SimpleDateFormat formatter2 = new SimpleDateFormat("HH:mm:ss"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java index 71d9513..4e34dbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java @@ -25,6 +25,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthString; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; @@ -38,6 +41,7 @@ value = "_FUNC_(date) - Returns the month of date", extended = "Example:\n" + " > SELECT _FUNC_('2009-30-07') FROM src LIMIT 1;\n" + " 7") +@VectorizedExpressions({VectorUDFMonthLong.class, VectorUDFMonthString.class}) public class UDFMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java index d672cb6..0292931 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondString; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -43,7 +44,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 59\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 59") -@VectorizedExpressions({VectorUDFSecondLong.class}) +@VectorizedExpressions({VectorUDFSecondLong.class, VectorUDFSecondString.class}) public class UDFSecond extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java index 224aa0c..f076d1d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearString; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; @@ -43,7 +44,7 @@ + " > SELECT _FUNC_('2008-02-20') FROM src LIMIT 1;\n" + " 8\n" + " > SELECT _FUNC_('1980-12-31 12:59:59') FROM src LIMIT 1;\n" + " 1") -@VectorizedExpressions({VectorUDFWeekOfYearLong.class}) +@VectorizedExpressions({VectorUDFWeekOfYearLong.class, VectorUDFWeekOfYearString.class}) public class UDFWeekOfYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java index 9656a65..1853860 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearString; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.IntWritable; @@ -42,7 +43,7 @@ + "'yyyy-MM-dd'.\n" + "Example:\n " + " > SELECT _FUNC_('2009-30-07', 1) FROM src LIMIT 1;\n" + " 2009") -@VectorizedExpressions({VectorUDFYearLong.class}) +@VectorizedExpressions({VectorUDFYearLong.class, VectorUDFYearString.class}) public class UDFYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private final Calendar calendar = Calendar.getInstance(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index d5c4527..dc259c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampString; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; @@ -42,7 +43,7 @@ @Description(name = "to_unix_timestamp", value = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp", extended = "Converts the specified time to number of seconds since 1970-01-01.") -@VectorizedExpressions({VectorUDFUnixTimeStampLong.class}) +@VectorizedExpressions({VectorUDFUnixTimeStampLong.class, VectorUDFUnixTimeStampString.class}) public class GenericUDFToUnixTimeStamp extends GenericUDF { private transient StringObjectInspector intputTextOI; diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q new file mode 100644 index 0000000..cb9220d --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -0,0 +1,85 @@ +SET hive.vectorized.execution.enabled = true; + +-- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC; + +INSERT OVERWRITE TABLE alltypesorc_string +SELECT + to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'), + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) +FROM alltypesorc +LIMIT 40; + +EXPLAIN SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string; + +SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string; + +EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string; + +SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string; + +EXPLAIN SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string; + +-- Should all be true or NULL +SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string; diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out new file mode 100644 index 0000000..2e9097c --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -0,0 +1,469 @@ +PREHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. + +CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypesorc_string +PREHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string +SELECT + to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'), + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) +FROM alltypesorc +LIMIT 40 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string +SELECT + to_utc_timestamp(ctimestamp1, 'America/Los_Angeles'), + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) +FROM alltypesorc +LIMIT 40 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION day (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION hour (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION minute (TOK_TABLE_OR_COL ctimestamp1))) (TOK_SELEXPR (TOK_FUNCTION second (TOK_TABLE_OR_COL ctimestamp1)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc_string + TableScan + alias: alltypesorc_string + Select Operator + expressions: + expr: to_unix_timestamp(ctimestamp1) + type: bigint + expr: year(ctimestamp1) + type: int + expr: month(ctimestamp1) + type: int + expr: day(ctimestamp1) + type: int + expr: dayofmonth(ctimestamp1) + type: int + expr: weekofyear(ctimestamp1) + type: int + expr: hour(ctimestamp1) + type: int + expr: minute(ctimestamp1) + type: int + expr: second(ctimestamp1) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(ctimestamp1), + year(ctimestamp1), + month(ctimestamp1), + day(ctimestamp1), + dayofmonth(ctimestamp1), + weekofyear(ctimestamp1), + hour(ctimestamp1), + minute(ctimestamp1), + second(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +28786 1969 12 31 31 1 23 59 46 +NULL NULL NULL NULL NULL NULL NULL NULL NULL +28795 1969 12 31 31 1 23 59 55 +28784 1969 12 31 31 1 23 59 44 +28790 1969 12 31 31 1 23 59 50 +28815 1970 1 1 1 1 0 0 15 +28807 1970 1 1 1 1 0 0 7 +28804 1970 1 1 1 1 0 0 4 +28792 1969 12 31 31 1 23 59 52 +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28805 1970 1 1 1 1 0 0 5 +28814 1970 1 1 1 1 0 0 14 +28813 1970 1 1 1 1 0 0 13 +28787 1969 12 31 31 1 23 59 47 +28809 1970 1 1 1 1 0 0 9 +28792 1969 12 31 31 1 23 59 52 +28800 1970 1 1 1 1 0 0 0 +28805 1970 1 1 1 1 0 0 5 +28798 1969 12 31 31 1 23 59 58 +28789 1969 12 31 31 1 23 59 49 +28811 1970 1 1 1 1 0 0 11 +28808 1970 1 1 1 1 0 0 8 +28802 1970 1 1 1 1 0 0 2 +28804 1970 1 1 1 1 0 0 4 +28784 1969 12 31 31 1 23 59 44 +28789 1969 12 31 31 1 23 59 49 +28792 1969 12 31 31 1 23 59 52 +28807 1970 1 1 1 1 0 0 7 +28795 1969 12 31 31 1 23 59 55 +28800 1970 1 1 1 1 0 0 0 +28798 1969 12 31 31 1 23 59 58 +28807 1970 1 1 1 1 0 0 7 +28795 1969 12 31 31 1 23 59 55 +28785 1969 12 31 31 1 23 59 45 +28788 1969 12 31 31 1 23 59 48 +28803 1970 1 1 1 1 0 0 3 +28808 1970 1 1 1 1 0 0 8 +28792 1969 12 31 31 1 23 59 52 +28806 1970 1 1 1 1 0 0 6 +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION year (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION month (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION day (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION hour (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION minute (TOK_TABLE_OR_COL stimestamp1))) (TOK_SELEXPR (TOK_FUNCTION second (TOK_TABLE_OR_COL stimestamp1)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc_string + TableScan + alias: alltypesorc_string + Select Operator + expressions: + expr: to_unix_timestamp(stimestamp1) + type: bigint + expr: year(stimestamp1) + type: int + expr: month(stimestamp1) + type: int + expr: day(stimestamp1) + type: int + expr: dayofmonth(stimestamp1) + type: int + expr: weekofyear(stimestamp1) + type: int + expr: hour(stimestamp1) + type: int + expr: minute(stimestamp1) + type: int + expr: second(stimestamp1) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + to_unix_timestamp(stimestamp1), + year(stimestamp1), + month(stimestamp1), + day(stimestamp1), + dayofmonth(stimestamp1), + weekofyear(stimestamp1), + hour(stimestamp1), + minute(stimestamp1), + second(stimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +28786 1969 12 31 31 1 23 59 46 +NULL NULL NULL NULL NULL NULL NULL NULL NULL +28795 1969 12 31 31 1 23 59 55 +28784 1969 12 31 31 1 23 59 44 +28790 1969 12 31 31 1 23 59 50 +28815 1970 1 1 1 1 0 0 15 +28807 1970 1 1 1 1 0 0 7 +28804 1970 1 1 1 1 0 0 4 +28792 1969 12 31 31 1 23 59 52 +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28805 1970 1 1 1 1 0 0 5 +28814 1970 1 1 1 1 0 0 14 +28813 1970 1 1 1 1 0 0 13 +28787 1969 12 31 31 1 23 59 47 +28809 1970 1 1 1 1 0 0 9 +28792 1969 12 31 31 1 23 59 52 +28800 1970 1 1 1 1 0 0 0 +28805 1970 1 1 1 1 0 0 5 +28798 1969 12 31 31 1 23 59 58 +28789 1969 12 31 31 1 23 59 49 +28811 1970 1 1 1 1 0 0 11 +28808 1970 1 1 1 1 0 0 8 +28802 1970 1 1 1 1 0 0 2 +28804 1970 1 1 1 1 0 0 4 +28784 1969 12 31 31 1 23 59 44 +28789 1969 12 31 31 1 23 59 49 +28792 1969 12 31 31 1 23 59 52 +28807 1970 1 1 1 1 0 0 7 +28795 1969 12 31 31 1 23 59 55 +28800 1970 1 1 1 1 0 0 0 +28798 1969 12 31 31 1 23 59 58 +28807 1970 1 1 1 1 0 0 7 +28795 1969 12 31 31 1 23 59 55 +28785 1969 12 31 31 1 23 59 45 +28788 1969 12 31 31 1 23 59 48 +28803 1970 1 1 1 1 0 0 3 +28808 1970 1 1 1 1 0 0 8 +28792 1969 12 31 31 1 23 59 52 +28806 1970 1 1 1 1 0 0 6 +PREHOOK: query: EXPLAIN SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc_string))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (= (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION to_unix_timestamp (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION year (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION year (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION month (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION month (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION day (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION day (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION dayofmonth (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION weekofyear (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION hour (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION hour (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION minute (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION minute (TOK_TABLE_OR_COL stimestamp1)))) (TOK_SELEXPR (= (TOK_FUNCTION second (TOK_TABLE_OR_COL ctimestamp1)) (TOK_FUNCTION second (TOK_TABLE_OR_COL stimestamp1))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc_string + TableScan + alias: alltypesorc_string + Select Operator + expressions: + expr: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) + type: boolean + expr: (year(ctimestamp1) = year(stimestamp1)) + type: boolean + expr: (month(ctimestamp1) = month(stimestamp1)) + type: boolean + expr: (day(ctimestamp1) = day(stimestamp1)) + type: boolean + expr: (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) + type: boolean + expr: (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) + type: boolean + expr: (hour(ctimestamp1) = hour(stimestamp1)) + type: boolean + expr: (minute(ctimestamp1) = minute(stimestamp1)) + type: boolean + expr: (second(ctimestamp1) = second(stimestamp1)) + type: boolean + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- Should all be true or NULL +SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: -- Should all be true or NULL +SELECT + to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1), + year(ctimestamp1) = year(stimestamp1), + month(ctimestamp1) = month(stimestamp1), + day(ctimestamp1) = day(stimestamp1), + dayofmonth(ctimestamp1) = dayofmonth(stimestamp1), + weekofyear(ctimestamp1) = weekofyear(stimestamp1), + hour(ctimestamp1) = hour(stimestamp1), + minute(ctimestamp1) = minute(stimestamp1), + second(ctimestamp1) = second(stimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ] +true true true true true true true true true +NULL NULL NULL NULL NULL NULL NULL NULL NULL +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true