diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt new file mode 100644 index 0000000..e8ef279 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnColumn.txt @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private final int arg2Column; + private final int arg3Column; + + public (int arg1Column, int arg2Column, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + } + + public () { + super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg2ColVector = () batch.cols[arg2Column]; + boolean[] arg2IsNull = arg2ColVector.isNull; + arg3ColVector = () batch.cols[arg3Column]; + boolean[] arg3IsNull = arg3ColVector.isNull; + outputColVector = () batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + /* All the code paths below propagate nulls even if neither arg2 nor arg3 + * have nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + if (!arg2IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg2ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { + if (!arg3IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg3ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + if (!arg2IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg2ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { + if (!arg3IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg3ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + if (!arg2IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg2ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { + if (!arg3IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg3ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + if (!arg2IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg2ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { + if (!arg3IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg3ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } + } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + arg3ColVector.unFlatten(); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + getColumnParamString(2, arg3Column); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt new file mode 100644 index 0000000..56ae2ca --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectColumnScalar.txt @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import ; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private final int arg2Column; + private final arg3Scalar; + + public (int arg1Column, int arg2Column, arg3Scalar, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Scalar = arg3Scalar; + } + + public () { + super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Column = -1; + arg3Scalar = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg2ColVector = () batch.cols[arg2Column]; + boolean[] arg2IsNull = arg2ColVector.isNull; + outputColVector = () batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + + // FUTURE: We could check arg2ColVector.noNulls and optimize these loops. + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + if (!arg2IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg2ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { + outputIsNull[i] = false; + outputColVector.set(i, arg3Scalar); + } + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + if (!arg2IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg2ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { + outputIsNull[i] = false; + outputColVector.set(i, arg3Scalar); + } + } + } + } else /* there are nulls */ { + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + if (!arg2IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg2ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { + outputIsNull[i] = false; + outputColVector.set(i, arg3Scalar); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + if (!arg2IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg2ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { + outputIsNull[i] = false; + outputColVector.set(i, arg3Scalar); + } + } + } + } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + + ", val "+ arg3Scalar; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt new file mode 100644 index 0000000..271b589 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarColumn.txt @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import ; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private arg2Scalar; + private final int arg3Column; + + public (int arg1Column, arg2Scalar, int arg3Column, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Column = arg3Column; + } + + public () { + super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Scalar = null; + arg3Column = -1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg3ColVector = () batch.cols[arg3Column]; + boolean[] arg3IsNull = arg3ColVector.isNull; + outputColVector = () batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + // This could be optimized in the future by having separate paths + // for when arg3ColVector is repeating or has no nulls. + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + + // FUTURE: We could check arg3ColVector.noNulls and optimize these loops. + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputIsNull[i] = false; + outputColVector.set(i, arg2Scalar); + } else { + if (!arg3IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg3ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputIsNull[i] = false; + outputColVector.set(i, arg2Scalar); + } else { + if (!arg3IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg3ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } + } else /* there are nulls */ { + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputIsNull[i] = false; + outputColVector.set(i, arg2Scalar); + } else { + if (!arg3IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg3ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputIsNull[i] = false; + outputColVector.set(i, arg2Scalar); + } else { + if (!arg3IsNull[i]) { + outputIsNull[i] = false; + outputColVector.set(i, arg3ColVector.asScratch(i)); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } + } + } + + // restore repeating and no nulls indicators + arg3ColVector.unFlatten(); + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", val "+ arg2Scalar + ", " + + getColumnParamString(2, arg3Column); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt new file mode 100644 index 0000000..10f97df --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprObjectScalarScalar.txt @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import ; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a constant value. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private final int arg1Column; + private final arg2Scalar; + private final arg3Scalar; + + public (int arg1Column, arg2Scalar, arg3Scalar, + int outputColumnNum) { + super(outputColumnNum); + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Scalar = arg3Scalar; + } + + public () { + super(); + + // Dummy final assignments. + arg1Column = -1; + arg2Scalar = null; + arg3Scalar = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + outputColVector = () batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, arg1Column) + ", val "+ arg2Scalar + ", val "+ arg3Scalar; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 2cad04b..491a6b1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -390,9 +390,6 @@ public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) thr if (initialDataTypePhysicalVariations == null) { return null; } - if (columnNum < 0) { - fake++; - } if (columnNum < initialDataTypePhysicalVariations.size()) { return initialDataTypePhysicalVariations.get(columnNum); } @@ -1682,8 +1679,6 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd return vectorExpression; } - static int fake = 0; - private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, Class udfClass, List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 75de7a0..2a10e29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -60,6 +60,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + boolean[] arg1IsNull = arg1ColVector.isNull; LongColumnVector arg2ColVector = (LongColumnVector) batch.cols[arg2Column]; LongColumnVector arg3ColVector = (LongColumnVector) batch.cols[arg3Column]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; @@ -87,7 +88,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1IsNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -121,14 +122,14 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + outputVector[i] = (!arg1IsNull[i] && vector1[i] == 1 ? vector2[i] : vector3[i]); outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); } } else { for(int i = 0; i != n; i++) { - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + outputVector[i] = (!arg1IsNull[i] && vector1[i] == 1 ? vector2[i] : vector3[i]); outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index 90ff765..142dd1b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -41,10 +41,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeColumnColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeColumnScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeScalarColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeScalarScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprIntervalDayTimeScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar; @@ -52,10 +52,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarCharScalar; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 551bb9e..791ac82 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -47,10 +47,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprTimestampScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java new file mode 100644 index 0000000..8de247c --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomBatchSource.java @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.ql.metadata.HiveException; + + +/** + * Generate random batch source from a random Object[] row source (VectorRandomRowSource). + */ +public class VectorRandomBatchSource { + + // Divide up rows array into different sized batches. + // Modify the rows array for isRepeating / NULL patterns. + // Provide iterator that will fill up a VRB with the divided up rows. + + private final VectorRandomRowSource vectorRandomRowSource; + + private final Object[][] randomRows; + + private final int rowCount; + private final int columnCount; + + private final VectorBatchPatterns vectorBatchPatterns; + + private VectorAssignRow vectorAssignRow; + + private int nextRowIndex; + private int batchCount; + + private VectorRandomBatchSource( + VectorRandomRowSource vectorRandomRowSource, + Object[][] randomRows, + VectorBatchPatterns vectorBatchPatterns, + VectorAssignRow vectorAssignRow) { + this.vectorRandomRowSource = vectorRandomRowSource; + this.randomRows = randomRows; + rowCount = randomRows.length; + Object[] firstRow = randomRows[0]; + columnCount = firstRow.length; + this.vectorBatchPatterns = vectorBatchPatterns; + this.vectorAssignRow = vectorAssignRow; + } + + public static class VectorRandomBatchParameters { + } + + private static class VectorBatchPatterns { + + private final List vectorBatchPatternList; + + VectorBatchPatterns(List vectorBatchPatternList) { + this.vectorBatchPatternList = vectorBatchPatternList; + } + + List getTectorBatchPatternList() { + return vectorBatchPatternList; + } + } + + private static class VectorBatchPattern { + + final int batchSize; + final BitSet bitSet; + + private VectorBatchPattern(int batchSize, BitSet bitSet) { + this.batchSize = batchSize; + this.bitSet = bitSet; + } + + public static VectorBatchPattern createRegularBatch(int batchSize) { + return new VectorBatchPattern(batchSize, null); + } + + public static VectorBatchPattern createRepeatedBatch(int batchSize, BitSet bitSet) { + return new VectorBatchPattern(batchSize, bitSet); + } + + public int getBatchSize() { + return batchSize; + } + + public BitSet getBitSet() { + return bitSet; + } + + public String toString() { + String batchSizeString = "batchSize " + Integer.toString(batchSize); + if (bitSet == null) { + return batchSizeString; + } + long bitMask = bitSet.toLongArray()[0]; + return batchSizeString + " repeating 0x" + Long.toHexString(bitMask); + } + } + + private static VectorBatchPatterns chooseBatchPatterns( + Random random, + VectorRandomRowSource vectorRandomRowSource, + Object[][] randomRows) { + + List vectorBatchPatternList = new ArrayList(); + final int rowCount = randomRows.length; + int rowIndex = 0; + + if (rowCount > 0) { + + final int columnCount = randomRows[0].length; + + // Choose first up to a full batch. + final int regularBatchSize = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); + vectorBatchPatternList.add(VectorBatchPattern.createRegularBatch(regularBatchSize)); + rowIndex += regularBatchSize; + + // Have a non-NULL value on hand. + Object[] nonNullRow = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + for (int r = 0; r < rowCount; r++) { + Object object = randomRows[r][c]; + if (object != null) { + nonNullRow[c] = object; + break; + } + } + } + + int columnPermutationLimit = Math.min(columnCount, Long.SIZE); + + // Repeated NULL permutations. + long columnPermutation = 1; + while (true) { + if (columnPermutation > columnPermutationLimit) { + break; + } + final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); + if (maximumRowCount == 0) { + break; + } + int randomRowCount = 1 + random.nextInt(maximumRowCount); + final int rowLimit = rowIndex + randomRowCount; + + BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation}); + + for (int columnNum = bitSet.nextSetBit(0); + columnNum >= 0; + columnNum = bitSet.nextSetBit(columnNum + 1)) { + + // Repeated NULL fill down column. + for (int r = rowIndex; r < rowLimit; r++) { + randomRows[r][columnNum] = null; + } + } + vectorBatchPatternList.add(VectorBatchPattern.createRepeatedBatch(randomRowCount, bitSet)); + columnPermutation++; + rowIndex = rowLimit; + } + + // Repeated non-NULL permutations. + columnPermutation = 1; + while (true) { + if (columnPermutation > columnPermutationLimit) { + break; + } + final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); + if (maximumRowCount == 0) { + break; + } + int randomRowCount = 1 + random.nextInt(maximumRowCount); + final int rowLimit = rowIndex + randomRowCount; + + BitSet bitSet = BitSet.valueOf(new long[]{columnPermutation}); + + for (int columnNum = bitSet.nextSetBit(0); + columnNum >= 0; + columnNum = bitSet.nextSetBit(columnNum + 1)) { + + // Repeated non-NULL fill down column. + Object repeatedObject = randomRows[rowIndex][columnNum]; + if (repeatedObject == null) { + repeatedObject = nonNullRow[columnNum]; + } + for (int r = rowIndex; r < rowLimit; r++) { + randomRows[r][columnNum] = repeatedObject; + } + } + vectorBatchPatternList.add(VectorBatchPattern.createRepeatedBatch(randomRowCount, bitSet)); + columnPermutation++; + rowIndex = rowLimit; + } + + // Remaining batches. + while (true) { + final int maximumRowCount = Math.min(rowCount - rowIndex, VectorizedRowBatch.DEFAULT_SIZE); + if (maximumRowCount == 0) { + break; + } + int randomRowCount = 1 + random.nextInt(maximumRowCount); + vectorBatchPatternList.add(VectorBatchPattern.createRegularBatch(randomRowCount)); + rowIndex += randomRowCount; + } + } + + // System.out.println("*DEBUG* vectorBatchPatternList" + vectorBatchPatternList.toString()); + + return new VectorBatchPatterns(vectorBatchPatternList); + } + + public static VectorRandomBatchSource createInterestingBatches( + Random random, + VectorRandomRowSource vectorRandomRowSource, + Object[][] randomRows, + VectorRandomBatchParameters vectorRandomBatchParameters) + throws HiveException { + + VectorAssignRow vectorAssignRow = new VectorAssignRow(); + vectorAssignRow.init(vectorRandomRowSource.typeNames()); + + VectorBatchPatterns vectorBatchPatterns = + chooseBatchPatterns(random, vectorRandomRowSource, randomRows); + + return new VectorRandomBatchSource( + vectorRandomRowSource, randomRows, vectorBatchPatterns, vectorAssignRow); + } + + public VectorRandomRowSource getRowSource() { + return vectorRandomRowSource; + } + + public Object[][] getRandomRows() { + return randomRows; + } + + public void resetBatchIteration() { + nextRowIndex = 0; + batchCount = 0; + } + + public int getBatchCount() { + return batchCount; + } + + public int getRowCount() { + return rowCount; + } + + /* + * Patterns of isRepeating columns + * For boolean: tri-state: null, 0, 1 + * For others: null, some-value + * noNulls: sometimes false and there are no NULLs. + * Random selectedInUse, too. + */ + public boolean fillNextBatch(VectorizedRowBatch batch) { + if (nextRowIndex >= rowCount) { + return false; + } + + VectorBatchPattern vectorBatchPattern = + vectorBatchPatterns.getTectorBatchPatternList().get(batchCount); + final int batchSize = vectorBatchPattern.getBatchSize(); + + for (int c = 0; c < columnCount; c++) { + batch.cols[c].reset(); + } + + BitSet bitSet = vectorBatchPattern.getBitSet(); + if (bitSet != null) { + for (int columnNum = bitSet.nextSetBit(0); + columnNum >= 0; + columnNum = bitSet.nextSetBit(columnNum + 1)) { + batch.cols[columnNum].isRepeating = true; + } + } + + int rowIndex = nextRowIndex; + for (int batchIndex = 0; batchIndex < batchSize; batchIndex++) { + for (int c = 0; c < columnCount; c++) { + if (batch.cols[c].isRepeating && batchIndex > 0) { + continue; + } + vectorAssignRow.assignRowColumn(batch, batchIndex, c, randomRows[rowIndex][c]); + } + rowIndex++; + } + batch.size = batchSize; + batchCount++; + nextRowIndex += batchSize; + return true; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index 3f99328..fa5c775 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -24,7 +24,9 @@ import java.util.HashSet; import java.util.List; import java.util.Random; +import java.util.Set; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -32,6 +34,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -71,6 +74,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; @@ -96,6 +100,8 @@ private TypeInfo[] typeInfos; + private DataTypePhysicalVariation[] dataTypePhysicalVariations; + private List objectInspectorList; // Primitive. @@ -127,6 +133,10 @@ return typeInfos; } + public DataTypePhysicalVariation[] dataTypePhysicalVariations() { + return dataTypePhysicalVariations; + } + public PrimitiveCategory[] primitiveCategories() { return primitiveCategories; } @@ -163,7 +173,22 @@ public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth) { public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth, boolean allowNull) { this.r = r; this.allowNull = allowNull; - chooseSchema(supportedTypes, maxComplexDepth); + chooseSchema(supportedTypes, null, null, null, maxComplexDepth); + } + + public void init(Random r, Set allowedTypeNameSet, int maxComplexDepth, boolean allowNull) { + this.r = r; + this.allowNull = allowNull; + chooseSchema(SupportedTypes.ALL, allowedTypeNameSet, null, null, maxComplexDepth); + } + + public void initExplicitSchema(Random r, List explicitTypeNameList, int maxComplexDepth, + boolean allowNull, List explicitDataTypePhysicalVariationList) { + this.r = r; + this.allowNull = allowNull; + chooseSchema( + SupportedTypes.ALL, null, explicitTypeNameList, explicitDataTypePhysicalVariationList, + maxComplexDepth); } /* @@ -180,7 +205,7 @@ public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth, b "float", "double", "string", -// "char", + "char", "varchar", "binary", "date", @@ -197,27 +222,30 @@ public void init(Random r, SupportedTypes supportedTypes, int maxComplexDepth, b "map" }; - private String getRandomTypeName(SupportedTypes supportedTypes) { + private String getRandomTypeName(SupportedTypes supportedTypes, Set allowedTypeNameSet) { String typeName = null; - if (r.nextInt(10 ) != 0) { - typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)]; - } else { - switch (supportedTypes) { - case PRIMITIVES: + do { + if (r.nextInt(10 ) != 0) { typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)]; - break; - case ALL_EXCEPT_MAP: - typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length - 1)]; - break; - case ALL: - typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length)]; - break; + } else { + switch (supportedTypes) { + case PRIMITIVES: + typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)]; + break; + case ALL_EXCEPT_MAP: + typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length - 1)]; + break; + case ALL: + typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length)]; + break; + } } - } + } while (allowedTypeNameSet != null && !allowedTypeNameSet.contains(typeName)); return typeName; } - private String getDecoratedTypeName(String typeName, SupportedTypes supportedTypes, int depth, int maxDepth) { + private String getDecoratedTypeName(String typeName, SupportedTypes supportedTypes, + Set allowedTypeNameSet, int depth, int maxDepth) { depth++; if (depth < maxDepth) { supportedTypes = SupportedTypes.PRIMITIVES; @@ -229,23 +257,32 @@ private String getDecoratedTypeName(String typeName, SupportedTypes supportedTyp final int maxLength = 1 + r.nextInt(100); typeName = String.format("varchar(%d)", maxLength); } else if (typeName.equals("decimal")) { - typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + typeName = + String.format( + "decimal(%d,%d)", + HiveDecimal.SYSTEM_DEFAULT_PRECISION, + HiveDecimal.SYSTEM_DEFAULT_SCALE); } else if (typeName.equals("array")) { - String elementTypeName = getRandomTypeName(supportedTypes); - elementTypeName = getDecoratedTypeName(elementTypeName, supportedTypes, depth, maxDepth); + String elementTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); + elementTypeName = + getDecoratedTypeName(elementTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); typeName = String.format("array<%s>", elementTypeName); } else if (typeName.equals("map")) { - String keyTypeName = getRandomTypeName(SupportedTypes.PRIMITIVES); - keyTypeName = getDecoratedTypeName(keyTypeName, supportedTypes, depth, maxDepth); - String valueTypeName = getRandomTypeName(supportedTypes); - valueTypeName = getDecoratedTypeName(valueTypeName, supportedTypes, depth, maxDepth); + String keyTypeName = getRandomTypeName(SupportedTypes.PRIMITIVES, allowedTypeNameSet); + keyTypeName = + getDecoratedTypeName(keyTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); + String valueTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); + valueTypeName = + getDecoratedTypeName(valueTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); typeName = String.format("map<%s,%s>", keyTypeName, valueTypeName); } else if (typeName.equals("struct")) { final int fieldCount = 1 + r.nextInt(10); final StringBuilder sb = new StringBuilder(); for (int i = 0; i < fieldCount; i++) { - String fieldTypeName = getRandomTypeName(supportedTypes); - fieldTypeName = getDecoratedTypeName(fieldTypeName, supportedTypes, depth, maxDepth); + String fieldTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); + fieldTypeName = + getDecoratedTypeName( + fieldTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); if (i > 0) { sb.append(","); } @@ -260,8 +297,10 @@ private String getDecoratedTypeName(String typeName, SupportedTypes supportedTyp final int fieldCount = 1 + r.nextInt(10); final StringBuilder sb = new StringBuilder(); for (int i = 0; i < fieldCount; i++) { - String fieldTypeName = getRandomTypeName(supportedTypes); - fieldTypeName = getDecoratedTypeName(fieldTypeName, supportedTypes, depth, maxDepth); + String fieldTypeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); + fieldTypeName = + getDecoratedTypeName( + fieldTypeName, supportedTypes, allowedTypeNameSet, depth, maxDepth); if (i > 0) { sb.append(","); } @@ -273,14 +312,29 @@ private String getDecoratedTypeName(String typeName, SupportedTypes supportedTyp } private ObjectInspector getObjectInspector(TypeInfo typeInfo) { + return getObjectInspector(typeInfo, DataTypePhysicalVariation.NONE); + } + + private ObjectInspector getObjectInspector(TypeInfo typeInfo, + DataTypePhysicalVariation dataTypePhysicalVariation) { + final ObjectInspector objectInspector; switch (typeInfo.getCategory()) { case PRIMITIVE: { - final PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) typeInfo; - objectInspector = - PrimitiveObjectInspectorFactory. - getPrimitiveWritableObjectInspector(primitiveType); + final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + if (primitiveTypeInfo instanceof DecimalTypeInfo && + dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + objectInspector = + PrimitiveObjectInspectorFactory. + getPrimitiveWritableObjectInspector( + TypeInfoFactory.longTypeInfo); + } else { + objectInspector = + PrimitiveObjectInspectorFactory. + getPrimitiveWritableObjectInspector( + primitiveTypeInfo); + } } break; case MAP: @@ -341,35 +395,50 @@ private ObjectInspector getObjectInspector(TypeInfo typeInfo) { return objectInspector; } - private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) { - HashSet hashSet = null; + private void chooseSchema(SupportedTypes supportedTypes, Set allowedTypeNameSet, + List explicitTypeNameList, + List explicitDataTypePhysicalVariationList, + int maxComplexDepth) { + HashSet hashSet = null; final boolean allTypes; - final boolean onlyOne = (r.nextInt(100) == 7); - if (onlyOne) { - columnCount = 1; + final boolean onlyOne; + if (explicitTypeNameList != null) { + columnCount = explicitTypeNameList.size(); + allTypes = false; + onlyOne = false; + } else if (allowedTypeNameSet != null) { + columnCount = 1 + r.nextInt(20); allTypes = false; + onlyOne = false; } else { - allTypes = r.nextBoolean(); - if (allTypes) { - switch (supportedTypes) { - case ALL: - columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length; - break; - case ALL_EXCEPT_MAP: - columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1; - break; - case PRIMITIVES: - columnCount = possibleHivePrimitiveTypeNames.length; - break; - } - hashSet = new HashSet(); + onlyOne = (r.nextInt(100) == 7); + if (onlyOne) { + columnCount = 1; + allTypes = false; } else { - columnCount = 1 + r.nextInt(20); + allTypes = r.nextBoolean(); + if (allTypes) { + switch (supportedTypes) { + case ALL: + columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length; + break; + case ALL_EXCEPT_MAP: + columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1; + break; + case PRIMITIVES: + columnCount = possibleHivePrimitiveTypeNames.length; + break; + } + hashSet = new HashSet(); + } else { + columnCount = 1 + r.nextInt(20); + } } } typeNames = new ArrayList(columnCount); categories = new Category[columnCount]; typeInfos = new TypeInfo[columnCount]; + dataTypePhysicalVariations = new DataTypePhysicalVariation[columnCount]; objectInspectorList = new ArrayList(columnCount); primitiveCategories = new PrimitiveCategory[columnCount]; @@ -379,9 +448,13 @@ private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) { for (int c = 0; c < columnCount; c++) { columnNames.add(String.format("col%d", c)); final String typeName; + DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE; - if (onlyOne) { - typeName = getRandomTypeName(supportedTypes); + if (explicitTypeNameList != null) { + typeName = explicitTypeNameList.get(c); + dataTypePhysicalVariation = explicitDataTypePhysicalVariationList.get(c); + } else if (onlyOne || allowedTypeNameSet != null) { + typeName = getRandomTypeName(supportedTypes, allowedTypeNameSet); } else { int typeNum; if (allTypes) { @@ -425,7 +498,8 @@ private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) { } - String decoratedTypeName = getDecoratedTypeName(typeName, supportedTypes, 0, maxComplexDepth); + String decoratedTypeName = + getDecoratedTypeName(typeName, supportedTypes, allowedTypeNameSet, 0, maxComplexDepth); final TypeInfo typeInfo; try { @@ -435,15 +509,14 @@ private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) { } typeInfos[c] = typeInfo; + dataTypePhysicalVariations[c] = dataTypePhysicalVariation; final Category category = typeInfo.getCategory(); categories[c] = category; - ObjectInspector objectInspector = getObjectInspector(typeInfo); + ObjectInspector objectInspector = getObjectInspector(typeInfo, dataTypePhysicalVariation); switch (category) { case PRIMITIVE: { final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - objectInspector = PrimitiveObjectInspectorFactory. - getPrimitiveWritableObjectInspector(primitiveTypeInfo); primitiveTypeInfos[c] = primitiveTypeInfo; PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); primitiveCategories[c] = primitiveCategory; @@ -498,27 +571,46 @@ private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) { } public Object[] randomPrimitiveRow(int columnCount) { - return randomPrimitiveRow(columnCount, r, primitiveTypeInfos); + return randomPrimitiveRow(columnCount, r, primitiveTypeInfos, dataTypePhysicalVariations); } public static Object[] randomPrimitiveRow(int columnCount, Random r, - PrimitiveTypeInfo[] primitiveTypeInfos) { + PrimitiveTypeInfo[] primitiveTypeInfos, + DataTypePhysicalVariation[] dataTypePhysicalVariations) { final Object row[] = new Object[columnCount]; for (int c = 0; c < columnCount; c++) { - row[c] = randomPrimitiveObject(r, primitiveTypeInfos[c]); + row[c] = randomPrimitiveObject(r, primitiveTypeInfos[c], dataTypePhysicalVariations[c]); } return row; } public static Object[] randomWritablePrimitiveRow(int columnCount, Random r, PrimitiveTypeInfo[] primitiveTypeInfos) { + return randomWritablePrimitiveRow(columnCount, r, primitiveTypeInfos, null); + } + + public static Object[] randomWritablePrimitiveRow(int columnCount, Random r, + PrimitiveTypeInfo[] primitiveTypeInfos, + DataTypePhysicalVariation[] dataTypePhysicalVariations) { final Object row[] = new Object[columnCount]; for (int c = 0; c < columnCount; c++) { final PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[c]; - final ObjectInspector objectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo); + final DataTypePhysicalVariation dataTypePhysicalVariation = + (dataTypePhysicalVariations != null ? + dataTypePhysicalVariations[c] : DataTypePhysicalVariation.NONE); + final ObjectInspector objectInspector; + if (primitiveTypeInfo instanceof DecimalTypeInfo && + dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + objectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + TypeInfoFactory.longTypeInfo); + } else { + objectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + primitiveTypeInfo); + } final Object object = randomPrimitiveObject(r, primitiveTypeInfo); row[c] = getWritablePrimitiveObject(primitiveTypeInfo, objectInspector, object); } @@ -575,6 +667,14 @@ public void sort(Object[][] rows) { public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeInfo, ObjectInspector objectInspector, Object object) { + return + getWritablePrimitiveObject( + primitiveTypeInfo, objectInspector, DataTypePhysicalVariation.NONE, object); + } + + public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeInfo, + ObjectInspector objectInspector, DataTypePhysicalVariation dataTypePhysicalVariation, + Object object) { switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: @@ -596,17 +696,17 @@ public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeI case STRING: return ((WritableStringObjectInspector) objectInspector).create((String) object); case CHAR: - { - WritableHiveCharObjectInspector writableCharObjectInspector = - new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); - return writableCharObjectInspector.create((HiveChar) object); - } + { + WritableHiveCharObjectInspector writableCharObjectInspector = + new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); + return writableCharObjectInspector.create((HiveChar) object); + } case VARCHAR: - { - WritableHiveVarcharObjectInspector writableVarcharObjectInspector = - new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); - return writableVarcharObjectInspector.create((HiveVarchar) object); - } + { + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = + new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); + return writableVarcharObjectInspector.create((HiveVarchar) object); + } case BINARY: return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create((byte[]) object); case TIMESTAMP: @@ -616,31 +716,55 @@ public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeI case INTERVAL_DAY_TIME: return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create((HiveIntervalDayTime) object); case DECIMAL: - { - WritableHiveDecimalObjectInspector writableDecimalObjectInspector = - new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); - return writableDecimalObjectInspector.create((HiveDecimal) object); - } + { + if (dataTypePhysicalVariation == dataTypePhysicalVariation.DECIMAL_64) { + final long value; + if (object instanceof HiveDecimal) { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; + value = new HiveDecimalWritable((HiveDecimal) object).serialize64( + decimalTypeInfo.getScale()); + } else { + value = (long) object; + } + return ((WritableLongObjectInspector) objectInspector).create(value); + } else { + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = + new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); + return writableDecimalObjectInspector.create((HiveDecimal) object); + } + } default: throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory()); } } public Object randomWritable(int column) { - return randomWritable(typeInfos[column], objectInspectorList.get(column)); + return randomWritable( + typeInfos[column], objectInspectorList.get(column), dataTypePhysicalVariations[column], + allowNull); } public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector) { - return randomWritable(typeInfo, objectInspector, allowNull); + return randomWritable(typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); + } + + public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, + boolean allowNull) { + return randomWritable(typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); } - public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, boolean allowNull) { + public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, + DataTypePhysicalVariation dataTypePhysicalVariation, boolean allowNull) { switch (typeInfo.getCategory()) { case PRIMITIVE: { + if (allowNull && r.nextInt(20) == 0) { + return null; + } final Object object = randomPrimitiveObject(r, (PrimitiveTypeInfo) typeInfo); - return getWritablePrimitiveObject((PrimitiveTypeInfo) typeInfo, objectInspector, object); + return getWritablePrimitiveObject( + (PrimitiveTypeInfo) typeInfo, objectInspector, dataTypePhysicalVariation, object); } case LIST: { @@ -780,6 +904,11 @@ public Object randomPrimitiveObject(int column) { } public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo) { + return randomPrimitiveObject(r, primitiveTypeInfo, DataTypePhysicalVariation.NONE); + } + + public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo, + DataTypePhysicalVariation dataTypePhysicalVariation) { switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: @@ -813,9 +942,14 @@ public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitive case INTERVAL_DAY_TIME: return getRandIntervalDayTime(r); case DECIMAL: - { - return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); - } + { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; + HiveDecimal hiveDecimal = getRandHiveDecimal(r, decimalTypeInfo); + if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + return new HiveDecimalWritable(hiveDecimal).serialize64(decimalTypeInfo.getScale()); + } + return hiveDecimal; + } default: throw new Error("Unknown primitive category " + primitiveTypeInfo.getCategory()); } @@ -869,7 +1003,13 @@ public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTy sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale)); } - return HiveDecimal.create(sb.toString()); + HiveDecimal dec = HiveDecimal.create(sb.toString()); + dec = + HiveDecimal.enforcePrecisionScale( + dec, decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale()); + if (dec != null) { + return dec; + } } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java new file mode 100644 index 0000000..c52ca19 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java @@ -0,0 +1,444 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.LongWritable; + +import junit.framework.Assert; + +import org.junit.Test; + +public class TestVectorIfStatement { + + @Test + public void testBoolean() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "boolean"); + } + + @Test + public void testInt() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "int"); + } + + @Test + public void testBigInt() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "bigint"); + } + + @Test + public void testString() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "string"); + } + + @Test + public void testTimestamp() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "timestamp"); + } + + @Test + public void testDate() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "date"); + } + + @Test + public void testIntervalDayTime() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "interval_day_time"); + } + + @Test + public void testIntervalYearMonth() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "interval_year_month"); + } + + @Test + public void testDouble() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "double"); + } + + @Test + public void testChar() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "char(10)"); + } + + @Test + public void testVarchar() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "varchar(15)"); + } + + @Test + public void testBinary() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "binary"); + } + + @Test + public void testDecimalLarge() throws Exception { + Random random = new Random(9300); + + doIfTests(random, "decimal(20,8)"); + } + + @Test + public void testDecimalSmall() throws Exception { + Random random = new Random(12882); + + doIfTests(random, "decimal(10,4)"); + } + + public enum IfStmtTestMode { + ROW_MODE, + ADAPTOR_WHEN, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + public enum ColumnScalarMode { + COLUMN_COLUMN, + COLUMN_SCALAR, + SCALAR_COLUMN, + SCALAR_SCALAR; + + static final int count = values().length; + } + + private void doIfTests(Random random, String typeName) + throws Exception { + doIfTests(random, typeName, DataTypePhysicalVariation.NONE); + } + + private void doIfTests(Random random, String typeName, + DataTypePhysicalVariation dataTypePhysicalVariation) + throws Exception { + for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { + doIfTestsWithDiffColumnScalar( + random, typeName, columnScalarMode, dataTypePhysicalVariation); + } + } + + private void doIfTestsWithDiffColumnScalar(Random random, String typeName, + ColumnScalarMode columnScalarMode, DataTypePhysicalVariation dataTypePhysicalVariation) + throws Exception { + + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + + boolean isDecimal64 = (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64); + final int decimal64Scale = + (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0); + + List explicitTypeNameList = new ArrayList(); + List explicitDataTypePhysicalVariationList = new ArrayList(); + explicitTypeNameList.add("boolean"); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + if (columnScalarMode != ColumnScalarMode.SCALAR_SCALAR) { + explicitTypeNameList.add(typeName); + explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN) { + explicitTypeNameList.add(typeName); + explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); + } + } + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initExplicitSchema( + random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + List columns = new ArrayList(); + columns.add("col0"); // The boolean predicate. + + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Boolean.class, "col0", "table", false); + int columnNum = 1; + ExprNodeDesc col2Expr; + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { + String columnName = "col" + (columnNum++); + col2Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); + columns.add(columnName); + } else { + Object scalar1Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) typeInfo); + col2Expr = new ExprNodeConstantDesc(typeInfo, scalar1Object); + } + ExprNodeDesc col3Expr; + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { + String columnName = "col" + (columnNum++); + col3Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false); + columns.add(columnName); + } else { + Object scalar2Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) typeInfo); + col3Expr = new ExprNodeConstantDesc(typeInfo, scalar2Object); + } + + List children = new ArrayList(); + children.add(col1Expr); + children.add(col2Expr); + children.add(col3Expr); + + //---------------------------------------------------------------------------------------------- + + String[] columnNames = columns.toArray(new String[0]); + + String[] outputScratchTypeNames = new String[] { typeName }; + DataTypePhysicalVariation[] outputDataTypePhysicalVariations = + new DataTypePhysicalVariation[] { dataTypePhysicalVariation }; + + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + outputScratchTypeNames, + outputDataTypePhysicalVariations); + + Object[][] randomRows = rowSource.randomRows(100000); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[IfStmtTestMode.count][]; + for (int i = 0; i < IfStmtTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + IfStmtTestMode ifStmtTestMode = IfStmtTestMode.values()[i]; + switch (ifStmtTestMode) { + case ROW_MODE: + doRowIfTest( + typeInfo, columns, children, randomRows, rowSource.rowStructObjectInspector(), + resultObjects); + break; + case ADAPTOR_WHEN: + case VECTOR_EXPRESSION: + doVectorIfTest( + typeInfo, + columns, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + children, + ifStmtTestMode, + columnScalarMode, + batchSource, + batchContext, + resultObjects); + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < IfStmtTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + " " + IfStmtTestMode.values()[v] + + " " + columnScalarMode + + " result is NULL " + (vectorResult == null) + + " does not match row-mode expected result is NULL " + (expectedResult == null)); + } + } else { + + if (isDecimal64 && expectedResult instanceof LongWritable) { + + HiveDecimalWritable expectedHiveDecimalWritable = new HiveDecimalWritable(0); + expectedHiveDecimalWritable.deserialize64( + ((LongWritable) expectedResult).get(), decimal64Scale); + expectedResult = expectedHiveDecimalWritable; + } + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + " " + IfStmtTestMode.values()[v] + + " " + columnScalarMode + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")"); + } + } + } + } + } + + private void doRowIfTest(TypeInfo typeInfo, List columns, List children, + Object[][] randomRows, ObjectInspector rowInspector, Object[] resultObjects) throws Exception { + + GenericUDF udf = new GenericUDFIf(); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(typeInfo, udf, children); + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + evaluator.initialize(rowInspector); + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + resultObjects[i] = result; + } + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, Object[] resultObjects) { + // UNDONE: selectedInUse + for (int i = 0; i < batch.size; i++) { + resultVectorExtractRow.extractRow(batch, i, scrqtchRow); + + // UNDONE: Need to copy the object. + resultObjects[rowIndex++] = scrqtchRow[0]; + } + } + + private void doVectorIfTest(TypeInfo typeInfo, + List columns, + TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + List children, + IfStmtTestMode ifStmtTestMode, ColumnScalarMode columnScalarMode, + VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext, + Object[] resultObjects) + throws Exception { + + GenericUDF udf; + switch (ifStmtTestMode) { + case VECTOR_EXPRESSION: + udf = new GenericUDFIf(); + break; + case ADAPTOR_WHEN: + udf = new GenericUDFWhen(); + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode); + } + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(typeInfo, udf, children); + + String ifExprMode = (ifStmtTestMode != IfStmtTestMode.VECTOR_EXPRESSION ? "adaptor" : "good"); + HiveConf hiveConf = new HiveConf(); + hiveConf.setVar(HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE, ifExprMode); + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); + resultVectorExtractRow.init(new TypeInfo[] { typeInfo }, new int[] { columns.size() }); + Object[] scrqtchRow = new Object[1]; + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " ifStmtTestMode " + ifStmtTestMode + + " columnScalarMode " + columnScalarMode + + " vectorExpression " + vectorExpression.getClass().getSimpleName()); + */ + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + vectorExpression.evaluate(batch); + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, resultObjects); + rowIndex += batch.size; + } + } +} diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index c3236c9..d53fa1e 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -1,23 +1,42 @@ --! qt:dataset:alltypesorc + set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; +SET hive.vectorized.execution.enabled = false; + -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. -- Turning on vectorization has been temporarily moved after filling the test table -- due to bug HIVE-8197. +-- SORT_QUERY_RESULTS -CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC; +CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, + ctimestamp2 timestamp) STORED AS ORC; INSERT OVERWRITE TABLE alltypesorc_string SELECT + cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, + ctimestamp2 FROM alltypesorc ORDER BY toutc, cst LIMIT 40; - -SET hive.vectorized.execution.enabled = true; +INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939'); +INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null); +INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null); +INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183'); +INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null); +INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null); +INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778'); +INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101'); +INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null); + +INSERT INTO TABLE alltypesorc_string values + (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), + (true, null, '1985-11-18 16:37:54.0', null), + (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989'); CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC; @@ -25,6 +44,8 @@ INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1; +SET hive.vectorized.execution.enabled = true; + EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), @@ -34,7 +55,15 @@ EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1; @@ -47,7 +76,15 @@ SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1; diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index c9dd434..79ba4c6 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +PREHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, + ctimestamp2 timestamp) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +POSTHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, + ctimestamp2 timestamp) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@alltypesorc_string PREHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT + cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, + ctimestamp2 FROM alltypesorc ORDER BY toutc, cst LIMIT 40 @@ -18,16 +22,146 @@ PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT + cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, + ctimestamp2 FROM alltypesorc ORDER BY toutc, cst LIMIT 40 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values + (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), + (true, null, '1985-11-18 16:37:54.0', null), + (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values + (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), + (true, null, '1985-11-18 16:37:54.0', null), + (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] PREHOOK: query: CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -72,7 +206,15 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -85,7 +227,15 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY @@ -109,18 +259,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 4356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator - expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), if(cboolean1, ctimestamp1, TIMESTAMP'1319-02-02 16:31:57.778') (type: timestamp), if(cboolean1, TIMESTAMP'2000-12-18 08:42:30.0005', ctimestamp1) (type: timestamp), if(cboolean1, ctimestamp1, ctimestamp2) (type: timestamp), if(cboolean1, ctimestamp1, null) (type: timestamp), if(cboolean1, null, ctimestamp2) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] - selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 4:int, VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 5:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 7:int, VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 8:int, VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 9:int, VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 10:int, VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 11:int - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 3, 14, 15, 16, 17, 18] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 9:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 10:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 11:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 12:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 13:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-02-02 16:31:57.778) -> 14:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 08:42:30.0005, col 1:timestamp) -> 15:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 16:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 17:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 18:timestamp + Statistics: Num rows: 52 Data size: 16836 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -128,8 +278,8 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) + Statistics: Num rows: 52 Data size: 16836 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -151,19 +301,19 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int), VALUE._col8 (type: boolean), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp), VALUE._col15 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 52 Data size: 16836 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16836 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -184,7 +334,15 @@ PREHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -199,52 +357,72 @@ POSTHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL +-45479000681 528 10 27 27 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL +1632478712 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 NULL 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 NULL NULL NULL +163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 NULL 1966-08-16 13:36:50.183 +163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 NULL 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 NULL NULL NULL +490725011 1985 7 20 20 29 9 30 11 true 1985-07-20 09:30:11 1319-02-02 16:31:57.778 1985-07-20 09:30:11 2000-12-18 08:42:30.0005 1985-07-20 09:30:11 1985-07-20 09:30:11 NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:44.028 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:44.028 NULL 1969-12-31 15:59:44.028 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:44.809 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:44.809 NULL 1969-12-31 15:59:44.809 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:50.531 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:50.531 NULL 1969-12-31 15:59:50.531 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:51.009 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:51.009 NULL 1969-12-31 15:59:51.009 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:53.761 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:53.761 NULL 1969-12-31 15:59:53.761 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:00.905 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:00.905 NULL 1969-12-31 16:00:00.905 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:03.586 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:03.586 NULL 1969-12-31 16:00:03.586 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.227 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.227 NULL 1969-12-31 16:00:05.227 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.535 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.535 NULL 1969-12-31 16:00:05.535 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.02 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.02 NULL 1969-12-31 16:00:07.02 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.365 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.365 NULL 1969-12-31 16:00:07.365 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.517 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.517 NULL 1969-12-31 16:00:07.517 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.767 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.767 NULL 1969-12-31 16:00:07.767 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:08.602 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:08.602 NULL 1969-12-31 16:00:08.602 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:09.938 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:09.938 NULL 1969-12-31 16:00:09.938 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.214 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.214 NULL 1969-12-31 16:00:14.214 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.783 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.783 NULL 1969-12-31 16:00:14.783 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:43.773 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:44.262 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:44.568 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:47.351 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:47.446 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:48.023 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:48.629 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.177 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.208 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:50.789 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:51.245 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:52.372 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:55.249 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.661 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.784 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:01.836 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.313 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.538 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.986 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.031 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.465 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:13.589 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 2024-11-11 16:42:41.101 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL NULL NULL 2000-12-18 08:42:30.0005 NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -291,7 +469,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1017 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -300,9 +478,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] - selectExpressions: VectorUDFUnixTimeStampString(col 1:string) -> 3:bigint, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 7:int, VectorUDFWeekOfYearString(col 1:string) -> 8:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 9:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 10:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 11:int - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -310,7 +488,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -339,13 +517,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -387,6 +565,18 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### +-2736243926 1883 4 17 17 16 4 14 34 +-62018170411 4 9 22 22 39 18 26 29 +1365579826 2013 4 10 10 15 0 43 46 +206731024925 8521 1 16 16 3 20 42 5 +271201265 1978 8 5 5 31 14 41 5 +501208674 1985 11 18 18 47 16 37 54 +501208674 1985 11 18 18 47 16 37 54 +94573848655 4966 12 4 4 49 9 30 55 +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL @@ -473,7 +663,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1684 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 3097 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -482,9 +672,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] - selectExpressions: LongColEqualLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFUnixTimeStampString(col 1:string) -> 4:bigint) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 3:int, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 3:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearString(col 1:string) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 3:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 4:int) -> 11:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 3:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 4:int) -> 12:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 3:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 4:int) -> 13:boolean - Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15] + selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean + Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -492,7 +682,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -521,13 +711,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -609,6 +799,18 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +false false false false false false false false false +false false false false false false false false false +false false false false false false false false false +false false false false false false false false false PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -788,7 +990,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -797,12 +999,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [1] + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 0:timestamp) -> timestamp, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFMinTimestamp(col 1:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -886,7 +1088,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL 0 40 +0528-10-27 08:15:18.941718273 7160-12-02 06:00:24.81200852 8 52 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string @@ -915,7 +1117,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -924,12 +1126,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [1] + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double + aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1016,7 +1218,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL +2.89160863229166E11 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -1059,7 +1261,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1068,13 +1270,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 6] - selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double - Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [1, 5, 8] + selectExpressions: CastTimestampToDouble(col 1:timestamp) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastTimestampToDouble(col 1:timestamp) -> 6:double, CastTimestampToDouble(col 1:timestamp) -> 7:double) -> 8:double + Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double + aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1175,4 +1377,4 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL NULL NULL NULL NULL NULL NULL +3.6145107904E10 false false false 7.5245155692476E10 7.5245155692476E10 7.5245155692476E10 8.0440455033059E10 diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 68b89a7..fe5fd23 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +PREHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, + ctimestamp2 timestamp) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +POSTHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, + ctimestamp2 timestamp) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@alltypesorc_string PREHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT + cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, + ctimestamp2 FROM alltypesorc ORDER BY toutc, cst LIMIT 40 @@ -18,16 +22,146 @@ PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT + cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, + ctimestamp2 FROM alltypesorc ORDER BY toutc, cst LIMIT 40 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values + (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), + (true, null, '1985-11-18 16:37:54.0', null), + (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values + (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), + (true, null, '1985-11-18 16:37:54.0', null), + (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] PREHOOK: query: CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -72,7 +206,15 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -85,7 +227,15 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY @@ -108,18 +258,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator - expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), if(cboolean1, ctimestamp1, TIMESTAMP'1319-02-02 16:31:57.778') (type: timestamp), if(cboolean1, TIMESTAMP'2000-12-18 08:42:30.0005', ctimestamp1) (type: timestamp), if(cboolean1, ctimestamp1, ctimestamp2) (type: timestamp), if(cboolean1, ctimestamp1, null) (type: timestamp), if(cboolean1, null, ctimestamp2) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] - selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 4:int, VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 5:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 7:int, VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 8:int, VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 9:int, VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 10:int, VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 11:int - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 3, 14, 15, 16, 17, 18] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 9:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 10:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 11:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 12:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 13:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-02-02 16:31:57.778) -> 14:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 08:42:30.0005, col 1:timestamp) -> 15:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 16:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 17:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 18:timestamp + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -127,8 +277,8 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized Map Vectorization: enabled: true @@ -149,19 +299,19 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int), VALUE._col8 (type: boolean), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp), VALUE._col15 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,7 +332,15 @@ PREHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -197,52 +355,72 @@ POSTHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL +-45479000681 528 10 27 27 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL +1632478712 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 NULL 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 NULL NULL NULL +163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 NULL 1966-08-16 13:36:50.183 +163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 NULL 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 NULL NULL NULL +490725011 1985 7 20 20 29 9 30 11 true 1985-07-20 09:30:11 1319-02-02 16:31:57.778 1985-07-20 09:30:11 2000-12-18 08:42:30.0005 1985-07-20 09:30:11 1985-07-20 09:30:11 NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:47.183 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:47.183 NULL 1969-12-31 15:59:47.183 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:52.843 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:52.843 NULL 1969-12-31 15:59:52.843 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:53.087 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:53.087 NULL 1969-12-31 15:59:53.087 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:53.55 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:53.55 NULL 1969-12-31 15:59:53.55 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:54.042 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:54.042 NULL 1969-12-31 15:59:54.042 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:54.686 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:54.686 NULL 1969-12-31 15:59:54.686 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:58.459 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:58.459 NULL 1969-12-31 15:59:58.459 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:00.889 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:00.889 NULL 1969-12-31 16:00:00.889 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:01.258 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:01.258 NULL 1969-12-31 16:00:01.258 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.698 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.698 NULL 1969-12-31 16:00:05.698 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:08.602 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:08.602 NULL 1969-12-31 16:00:08.602 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.214 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.214 NULL 1969-12-31 16:00:14.214 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:15.466 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:15.466 NULL 1969-12-31 16:00:15.466 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:46.123 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.989 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:51.119 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:52.961 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:52.967 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:53.593 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:53.641 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:55.407 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:55.439 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:56.031 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:57.719 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:58.636 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.176 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.423 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.477 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.93 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:01.839 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:02.13 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:03.151 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:03.756 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:06.134 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:07.209 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:10.361 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.525 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:13.589 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:13.839 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:15.601 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 2024-11-11 16:42:41.101 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL NULL NULL 2000-12-18 08:42:30.0005 NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -288,7 +466,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -297,9 +475,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] - selectExpressions: VectorUDFUnixTimeStampString(col 1:string) -> 3:bigint, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 7:int, VectorUDFWeekOfYearString(col 1:string) -> 8:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 9:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 10:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 11:int - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -307,7 +485,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized Map Vectorization: @@ -335,13 +513,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -383,6 +561,18 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### +-2736243926 1883 4 17 17 16 4 14 34 +-62018170411 4 9 22 22 39 18 26 29 +1365579826 2013 4 10 10 15 0 43 46 +206731024925 8521 1 16 16 3 20 42 5 +271201265 1978 8 5 5 31 14 41 5 +501208674 1985 11 18 18 47 16 37 54 +501208674 1985 11 18 18 47 16 37 54 +94573848655 4966 12 4 4 49 9 30 55 +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL @@ -468,7 +658,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -477,9 +667,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] - selectExpressions: LongColEqualLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFUnixTimeStampString(col 1:string) -> 4:bigint) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 3:int, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 3:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearString(col 1:string) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 3:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 4:int) -> 11:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 3:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 4:int) -> 12:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 3:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 4:int) -> 13:boolean - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15] + selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -487,7 +677,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized Map Vectorization: @@ -515,13 +705,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -603,6 +793,18 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +false false false false false false false false false +false false false false false false false false false +false false false false false false false false false +false false false false false false false false false PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -779,7 +981,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -788,12 +990,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [1] + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 0:timestamp) -> timestamp, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFMinTimestamp(col 1:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -876,7 +1078,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL 0 40 +0528-10-27 08:15:18.941718273 7160-12-02 06:00:24.81200852 8 52 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string @@ -904,7 +1106,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -913,12 +1115,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [1] + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double + aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1004,7 +1206,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL +2.89160863229166E11 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -1046,7 +1248,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -1055,13 +1257,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 6] - selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [1, 5, 8] + selectExpressions: CastTimestampToDouble(col 1:timestamp) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastTimestampToDouble(col 1:timestamp) -> 6:double, CastTimestampToDouble(col 1:timestamp) -> 7:double) -> 8:double + Statistics: Num rows: 52 Data size: 3555 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double + aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1161,4 +1363,4 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL NULL NULL NULL NULL NULL NULL +3.6145107904E10 false false false 7.5245155692476E10 7.5245155692476E10 7.5245155692476E10 8.0440455033059E10 diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 244aca6..01e915b 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -1,15 +1,19 @@ -PREHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +PREHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, + ctimestamp2 timestamp) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string -POSTHOOK: query: CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC +POSTHOOK: query: CREATE TABLE alltypesorc_string(cboolean1 boolean, ctimestamp1 timestamp, stimestamp1 string, + ctimestamp2 timestamp) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@alltypesorc_string PREHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT + cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, + ctimestamp2 FROM alltypesorc ORDER BY toutc, cst LIMIT 40 @@ -18,16 +22,146 @@ PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: INSERT OVERWRITE TABLE alltypesorc_string SELECT + cboolean1, to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS toutc, - CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst + CAST(to_utc_timestamp(ctimestamp1, 'America/Los_Angeles') AS STRING) as cst, + ctimestamp2 FROM alltypesorc ORDER BY toutc, cst LIMIT 40 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', '1978-08-05 14:41:05.501', '1999-10-03 16:59:10.396903939') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, null, '2013-04-10 00:43:46.8547315', null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (false, '2021-09-24 03:18:32.4', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', '0004-09-22 18:26:29.519542222', '1966-08-16 13:36:50.183') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, null, '4966-12-04 09:30:55.202', null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (null, '7160-12-02 06:00:24.81200852', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '1985-07-20 09:30:11.0', '8521-01-16 20:42:05.668832', '1319-02-02 16:31:57.778') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, null, '1883-04-17 04:14:34.64776', '2024-11-11 16:42:41.101') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values (true, '0528-10-27 08:15:18.941718273', null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 EXPRESSION [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 EXPRESSION [] +PREHOOK: query: INSERT INTO TABLE alltypesorc_string values + (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), + (true, null, '1985-11-18 16:37:54.0', null), + (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@alltypesorc_string +POSTHOOK: query: INSERT INTO TABLE alltypesorc_string values + (false, '2021-09-24 03:18:32.4', '1985-11-18 16:37:54.0', '2010-04-08 02:43:35.861742727'), + (true, null, '1985-11-18 16:37:54.0', null), + (null, '2021-09-24 03:18:32.4', null, '1974-10-04 17:21:03.989') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@alltypesorc_string +POSTHOOK: Lineage: alltypesorc_string.cboolean1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp1 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.ctimestamp2 SCRIPT [] +POSTHOOK: Lineage: alltypesorc_string.stimestamp1 SCRIPT [] PREHOOK: query: CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -72,7 +206,15 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -85,7 +227,15 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY @@ -103,18 +253,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator - expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), if(cboolean1, ctimestamp1, TIMESTAMP'1319-02-02 16:31:57.778') (type: timestamp), if(cboolean1, TIMESTAMP'2000-12-18 08:42:30.0005', ctimestamp1) (type: timestamp), if(cboolean1, ctimestamp1, ctimestamp2) (type: timestamp), if(cboolean1, ctimestamp1, null) (type: timestamp), if(cboolean1, null, ctimestamp2) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] - selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 4:int, VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 5:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 7:int, VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 8:int, VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 9:int, VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 10:int, VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 11:int - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 3, 14, 15, 16, 17, 18] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 9:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 10:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 11:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 12:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 13:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-02-02 16:31:57.778) -> 14:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 08:42:30.0005, col 1:timestamp) -> 15:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 16:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 17:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 18:timestamp + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -123,8 +273,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized Map Vectorization: enabled: true @@ -141,12 +291,12 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int), VALUE._col8 (type: boolean), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp), VALUE._col15 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -167,7 +317,15 @@ PREHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 PREHOOK: type: QUERY @@ -182,52 +340,72 @@ POSTHOOK: query: SELECT weekofyear(ctimestamp1), hour(ctimestamp1), minute(ctimestamp1), - second(ctimestamp1) + second(ctimestamp1), + cboolean1, + ctimestamp1, + ctimestamp2, + if (cboolean1, ctimestamp1, timestamp '1319-02-02 16:31:57.778'), + if (cboolean1, timestamp '2000-12-18 08:42:30.0005', ctimestamp1), + if (cboolean1, ctimestamp1, ctimestamp2), + if (cboolean1, ctimestamp1, null), + if (cboolean1, null, ctimestamp2) FROM alltypesorc_string ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL -NULL NULL NULL NULL NULL NULL NULL NULL NULL +-45479000681 528 10 27 27 43 8 15 18 true 0528-10-27 08:15:18.941718273 NULL 0528-10-27 08:15:18.941718273 2000-12-18 08:42:30.0005 0528-10-27 08:15:18.941718273 0528-10-27 08:15:18.941718273 NULL +1632478712 2021 9 24 24 38 3 18 32 NULL 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1974-10-04 17:21:03.989 NULL 1974-10-04 17:21:03.989 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 1999-10-03 16:59:10.396903939 NULL 1999-10-03 16:59:10.396903939 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 2010-04-08 02:43:35.861742727 NULL 2010-04-08 02:43:35.861742727 +1632478712 2021 9 24 24 38 3 18 32 false 2021-09-24 03:18:32.4 NULL 1319-02-02 16:31:57.778 2021-09-24 03:18:32.4 NULL NULL NULL +163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 1966-08-16 13:36:50.183 NULL 1966-08-16 13:36:50.183 +163809612024 7160 12 2 2 48 6 0 24 NULL 7160-12-02 06:00:24.81200852 NULL 1319-02-02 16:31:57.778 7160-12-02 06:00:24.81200852 NULL NULL NULL +490725011 1985 7 20 20 29 9 30 11 true 1985-07-20 09:30:11 1319-02-02 16:31:57.778 1985-07-20 09:30:11 2000-12-18 08:42:30.0005 1985-07-20 09:30:11 1985-07-20 09:30:11 NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:44.028 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:44.028 NULL 1969-12-31 15:59:44.028 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:44.809 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:44.809 NULL 1969-12-31 15:59:44.809 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:45.949 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:45.949 NULL 1969-12-31 15:59:45.949 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:50.531 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:50.531 NULL 1969-12-31 15:59:50.531 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:51.009 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:51.009 NULL 1969-12-31 15:59:51.009 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 15:59:53.761 1319-02-02 16:31:57.778 NULL 1969-12-31 15:59:53.761 NULL 1969-12-31 15:59:53.761 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:00.905 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:00.905 NULL 1969-12-31 16:00:00.905 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:03.586 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:03.586 NULL 1969-12-31 16:00:03.586 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.227 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.227 NULL 1969-12-31 16:00:05.227 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:05.535 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:05.535 NULL 1969-12-31 16:00:05.535 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.02 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.02 NULL 1969-12-31 16:00:07.02 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.365 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.365 NULL 1969-12-31 16:00:07.365 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.517 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.517 NULL 1969-12-31 16:00:07.517 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:07.767 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:07.767 NULL 1969-12-31 16:00:07.767 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:08.602 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:08.602 NULL 1969-12-31 16:00:08.602 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:09.938 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:09.938 NULL 1969-12-31 16:00:09.938 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.214 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.214 NULL 1969-12-31 16:00:14.214 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL 1969-12-31 16:00:14.783 1319-02-02 16:31:57.778 NULL 1969-12-31 16:00:14.783 NULL 1969-12-31 16:00:14.783 +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL false NULL NULL 1319-02-02 16:31:57.778 NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:43.773 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:44.262 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:44.568 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:45.697 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:47.351 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:47.446 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:48.023 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:48.629 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.177 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:49.208 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:50.789 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:51.245 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:52.372 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 15:59:55.249 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.661 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:00.784 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.313 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.538 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:09.986 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.031 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 1969-12-31 16:00:11.465 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL 2024-11-11 16:42:41.101 NULL 2000-12-18 08:42:30.0005 NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL true NULL NULL NULL 2000-12-18 08:42:30.0005 NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -268,7 +446,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -277,9 +455,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] - selectExpressions: VectorUDFUnixTimeStampString(col 1:string) -> 3:bigint, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 7:int, VectorUDFWeekOfYearString(col 1:string) -> 8:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 9:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 10:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 11:int - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] + selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -288,7 +466,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized Map Vectorization: @@ -308,10 +486,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -353,6 +531,18 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### +-2736243926 1883 4 17 17 16 4 14 34 +-62018170411 4 9 22 22 39 18 26 29 +1365579826 2013 4 10 10 15 0 43 46 +206731024925 8521 1 16 16 3 20 42 5 +271201265 1978 8 5 5 31 14 41 5 +501208674 1985 11 18 18 47 16 37 54 +501208674 1985 11 18 18 47 16 37 54 +94573848655 4966 12 4 4 49 9 30 55 +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL @@ -433,7 +623,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -442,9 +632,9 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] - selectExpressions: LongColEqualLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFUnixTimeStampString(col 1:string) -> 4:bigint) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 3:int, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 3:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearString(col 1:string) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 3:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 4:int) -> 11:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 3:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 4:int) -> 12:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 3:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 4:int) -> 13:boolean - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15] + selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -453,7 +643,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized Map Vectorization: @@ -473,10 +663,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -558,6 +748,18 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL +false false false false false false false false false +false false false false false false false false false +false false false false false false false false false +false false false false false false false false false PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -714,7 +916,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -723,12 +925,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [1] + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 0:timestamp) -> timestamp, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFMinTimestamp(col 1:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -798,7 +1000,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL 0 40 +0528-10-27 08:15:18.941718273 7160-12-02 06:00:24.81200852 8 52 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string @@ -821,7 +1023,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -830,12 +1032,12 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [1] + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double + aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -903,7 +1105,7 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL +2.89160863229166E11 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, @@ -940,7 +1142,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Select Operator @@ -949,13 +1151,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 6] - selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [1, 5, 8] + selectExpressions: CastTimestampToDouble(col 1:timestamp) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastTimestampToDouble(col 1:timestamp) -> 6:double, CastTimestampToDouble(col 1:timestamp) -> 7:double) -> 8:double + Statistics: Num rows: 52 Data size: 3515 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double + aggregators: VectorUDAFSumTimestamp(col 1:timestamp) -> double, VectorUDAFCount(col 1:timestamp) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false @@ -1037,4 +1239,4 @@ FROM alltypesorc_string POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### -NULL NULL NULL NULL NULL NULL NULL NULL +3.6145107904E10 false false false 7.5245155692476E10 7.5245155692476E10 7.5245155692476E10 8.0440455033059E10 diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index bebf769..f8ed7e2 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -180,6 +180,70 @@ public String stringifyColumn(int columnNum) { return b.toString(); } + private void appendVectorType(StringBuilder b, ColumnVector cv) { + String colVectorType = null; + if (cv instanceof LongColumnVector) { + colVectorType = "LONG"; + } else if (cv instanceof DoubleColumnVector) { + colVectorType = "DOUBLE"; + } else if (cv instanceof BytesColumnVector) { + colVectorType = "BYTES"; + } else if (cv instanceof DecimalColumnVector) { + colVectorType = "DECIMAL"; + } else if (cv instanceof TimestampColumnVector) { + colVectorType = "TIMESTAMP"; + } else if (cv instanceof IntervalDayTimeColumnVector) { + colVectorType = "INTERVAL_DAY_TIME"; + } else if (cv instanceof ListColumnVector) { + colVectorType = "LIST"; + } else if (cv instanceof MapColumnVector) { + colVectorType = "MAP"; + } else if (cv instanceof StructColumnVector) { + colVectorType = "STRUCT"; + } else if (cv instanceof UnionColumnVector) { + colVectorType = "UNION"; + } else { + colVectorType = "Unknown"; + } + b.append(colVectorType); + + if (cv instanceof ListColumnVector) { + ListColumnVector listColumnVector = (ListColumnVector) cv; + b.append("<"); + appendVectorType(b, listColumnVector.child); + b.append(">"); + } else if (cv instanceof MapColumnVector) { + MapColumnVector mapColumnVector = (MapColumnVector) cv; + b.append("<"); + appendVectorType(b, mapColumnVector.keys); + b.append(", "); + appendVectorType(b, mapColumnVector.values); + b.append(">"); + } else if (cv instanceof StructColumnVector) { + StructColumnVector structColumnVector = (StructColumnVector) cv; + b.append("<"); + final int fieldCount = structColumnVector.fields.length; + for (int i = 0; i < fieldCount; i++) { + if (i > 0) { + b.append(", "); + } + appendVectorType(b, structColumnVector.fields[i]); + } + b.append(">"); + } else if (cv instanceof UnionColumnVector) { + UnionColumnVector unionColumnVector = (UnionColumnVector) cv; + b.append("<"); + final int fieldCount = unionColumnVector.fields.length; + for (int i = 0; i < fieldCount; i++) { + if (i > 0) { + b.append(", "); + } + appendVectorType(b, unionColumnVector.fields[i]); + } + b.append(">"); + } + } + public String stringify(String prefix) { if (size == 0) { return ""; @@ -195,33 +259,10 @@ public String stringify(String prefix) { } b.append(projIndex); b.append(":"); - String colVectorType = null; - if (cv instanceof LongColumnVector) { - colVectorType = "LONG"; - } else if (cv instanceof DoubleColumnVector) { - colVectorType = "DOUBLE"; - } else if (cv instanceof BytesColumnVector) { - colVectorType = "BYTES"; - } else if (cv instanceof DecimalColumnVector) { - colVectorType = "DECIMAL"; - } else if (cv instanceof TimestampColumnVector) { - colVectorType = "TIMESTAMP"; - } else if (cv instanceof IntervalDayTimeColumnVector) { - colVectorType = "INTERVAL_DAY_TIME"; - } else if (cv instanceof ListColumnVector) { - colVectorType = "LIST"; - } else if (cv instanceof MapColumnVector) { - colVectorType = "MAP"; - } else if (cv instanceof StructColumnVector) { - colVectorType = "STRUCT"; - } else if (cv instanceof UnionColumnVector) { - colVectorType = "UNION"; - } else { - colVectorType = "Unknown"; - } - b.append(colVectorType); + appendVectorType(b, cv); } b.append('\n'); + b.append(prefix); if (this.selectedInUse) { for (int j = 0; j < size; j++) { diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java index 7bc03ed..3d9f262 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java @@ -119,7 +119,7 @@ public void testStringify() throws IOException { byte[] buffer = ("value " + r).getBytes(StandardCharsets.UTF_8); y.setRef(r, buffer, 0, buffer.length); } - final String EXPECTED = ("Column vector types: 0:STRUCT, 1:BYTES\n" + + final String EXPECTED = ("Column vector types: 0:STRUCT, 1:BYTES\n" + "[[0, 2000-01-01 00:00:01.0], \"value 0\"]\n" + "[[3, 2000-01-01 00:00:02.0], \"value 1\"]\n" + "[[6, 2000-01-01 00:00:03.0], \"value 2\"]\n" + @@ -153,7 +153,7 @@ public void testStringify2() throws IOException { byte[] buffer = ("value " + r).getBytes(StandardCharsets.UTF_8); y.setRef(r, buffer, 0, buffer.length); } - final String EXPECTED = ("Column vector types: 0:STRUCT, 1:BYTES\n" + + final String EXPECTED = ("Column vector types: 0:STRUCT, 1:BYTES\n" + "[[0, 2000-01-01 00:00:01], \"value 0\"]\n" + "[[3, 2000-01-01 00:00:02], \"value 1\"]\n" + "[[6, 2000-01-01 00:00:03], \"value 2\"]\n" + diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index fbb89a9..b5220a0 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -1063,6 +1063,15 @@ {"IfExprScalarScalar", "long", "double"}, {"IfExprScalarScalar", "double", "double"}, + {"IfExprObjectColumnColumn", "timestamp"}, + {"IfExprObjectColumnColumn", "interval_day_time"}, + {"IfExprObjectColumnScalar", "timestamp"}, + {"IfExprObjectColumnScalar", "interval_day_time"}, + {"IfExprObjectScalarColumn", "timestamp"}, + {"IfExprObjectScalarColumn", "interval_day_time"}, + {"IfExprObjectScalarScalar", "timestamp"}, + {"IfExprObjectScalarScalar", "interval_day_time"}, + // template, , , , , {"VectorUDAFMinMax", "VectorUDAFMinLong", "long", "<", "min", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: long)"}, @@ -1385,6 +1394,12 @@ private void generate() throws Exception { generateIfExprScalarColumn(tdesc); } else if (tdesc[0].equals("IfExprScalarScalar")) { generateIfExprScalarScalar(tdesc); + } else if ( + tdesc[0].equals("IfExprObjectColumnColumn") || + tdesc[0].equals("IfExprObjectColumnScalar") || + tdesc[0].equals("IfExprObjectScalarColumn") || + tdesc[0].equals("IfExprObjectScalarScalar")) { + generateIfExprObject(tdesc); } else if (tdesc[0].equals("FilterDecimalColumnCompareDecimalScalar")) { generateFilterDecimalColumnCompareDecimalScalar(tdesc); } else if (tdesc[0].equals("FilterDecimalScalarCompareDecimalColumn")) { @@ -2259,6 +2274,46 @@ private void generateIfExprScalarScalar(String[] tdesc) throws Exception { className, templateString); } + private void generateIfExprObject(String [] tdesc) throws Exception { + String typeName = tdesc[1]; + String objectName; + String scalarType; + String scalarImport; + if (typeName.equals("timestamp")) { + objectName = "Timestamp"; + scalarType = "Timestamp"; + scalarImport = "java.sql.Timestamp"; + } else if (typeName.equals("interval_day_time")) { + objectName = "IntervalDayTime"; + scalarType = "HiveIntervalDayTime"; + scalarImport = "org.apache.hadoop.hive.common.type.HiveIntervalDayTime"; + } else { + objectName = "unknown"; + scalarType = "unknown"; + scalarImport = "unknown"; + } + String classNameSuffix = tdesc[0].substring("IfExprObject".length()); + + String writableType = getOutputWritableType(typeName); + String columnVectorType = getColumnVectorType(typeName); + + String className = "IfExpr" + objectName + classNameSuffix; + + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", scalarType); + templateString = templateString.replaceAll("", scalarImport); + templateString = templateString.replaceAll("", typeName); + templateString = templateString.replaceAll("", objectName); + templateString = templateString.replaceAll("", writableType); + templateString = templateString.replaceAll("", columnVectorType); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + // template, , , private void generateDecimalColumnUnaryFunc(String [] tdesc) throws Exception { String classNamePrefix = tdesc[1];