diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt new file mode 100644 index 0000000..1aa398a --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Output a boolean value indicating if a column is [NOT] BETWEEN two constants. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + + protected final leftValue; + protected final rightValue; + + public (int colNum, leftValue, rightValue, int outputColumnNum) { + super(outputColumnNum); + this.colNum = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + public () { + super(); + + // Dummy final assignments. + colNum = -1; + leftValue = 0; + rightValue = 0; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector = () batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + [] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + final repeatValue = vector[0]; + outputVector[0] = (repeatValue < leftValue || repeatValue > rightValue) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, colNum) + ", left " + leftValue + ", right " + rightValue; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // return null since this will be handled as a special case in VectorizationContext + return null; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt new file mode 100644 index 0000000..1763cbd --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +public class extends { + private static final long serialVersionUID = 1L; + + public () { + super(); + } + + public (int colNum, long leftValue, long rightValue, int outputColumnNum) { + super(colNum, leftValue, rightValue, outputColumnNum); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1]; + HiveDecimalWritable writable1 = new HiveDecimalWritable(); + writable1.deserialize64(leftValue, decimalTypeInfo1.scale()); + + DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2]; + HiveDecimalWritable writable2 = new HiveDecimalWritable(); + writable2.deserialize64(rightValue, decimalTypeInfo2.scale()); + return + getColumnParamString(0, colNum) + + ", decimal64Left " + leftValue + ", decimalLeft " + writable1.toString() + + ", decimal64Right " + rightValue + ", decimalRight " + writable2.toString(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt new file mode 100644 index 0000000..6fd1301 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; + +import java.util.Arrays; +import java.util.HashSet; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class extends VectorExpression { + private static final long serialVersionUID = 1L; + + private final int inputColumn; + + protected final HiveDecimal leftValue; + protected final HiveDecimal rightValue; + + public () { + super(); + + // Dummy final assignments. + inputColumn = -1; + leftValue = null; + rightValue = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public (int colNum, HiveDecimal leftValue, HiveDecimal rightValue, + int outputColumnNum) { + super(outputColumnNum); + this.inputColumn = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColumnVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + HiveDecimalWritable[] vector = inputColumnVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColumnVector.isRepeating) { + if (inputColumnVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + final HiveDecimalWritable repeatValue = vector[0]; + outputVector[0] = + (DecimalUtil.compare(repeatValue, leftValue) < 0 || + DecimalUtil.compare(repeatValue, rightValue) > 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColumnVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputColumn) + + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } + +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index 0664cbf..47dd42f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -34,12 +34,12 @@ public class extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; + protected final int colNum; // The comparison is of the form "column BETWEEN leftValue AND rightValue". // NOTE: These can be set later by FilterColumnBetweenDynamicValue.txt so they are not final. - private leftValue; - private rightValue; + protected leftValue; + protected rightValue; public (int colNum, leftValue, rightValue) { super(); diff --git ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt new file mode 100644 index 0000000..798cb95 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +import java.util.Arrays; + +public class extends VectorExpression { + private static final long serialVersionUID = 1L; + + protected final int inputCol; + + private final byte[] left; + private final byte[] right; + + public () { + super(); + + // Dummy final assignments. + inputCol = -1; + left = null; + right = null; + } + + public (int colNum, byte[] left, byte[] right, int outputColumnNum) { + super(outputColumnNum); + this.inputCol = colNum; + this.left = left; + this.right = right; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] start = inputColVector.start; + int[] length = inputColVector.length; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + final byte[] repeatBytes = vector[0]; + final int repeatStart = start[0]; + final int repeatLength = length[0]; + outputVector[0] = + (StringExpr.compare(repeatBytes, repeatStart, repeatLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, repeatBytes, repeatStart, repeatLength) < 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputCol) + + ", left " + displayUtf8Bytes(left) + ", right " + displayUtf8Bytes(right); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt new file mode 100644 index 0000000..db42577 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.HashSet; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Output a boolean value indicating if a column is BETWEEN two constants. + */ +public class extends VectorExpression { + private static final long serialVersionUID = 1L; + + private final int inputCol; + + private final Timestamp leftValue; + private final Timestamp rightValue; + + public () { + super(); + + // Dummy final assignments. + inputCol = -1; + leftValue = null; + rightValue = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public (int colNum, Timestamp leftValue, Timestamp rightValue, int outputColumnNum) { + super(outputColumnNum); + this.inputCol = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + outputVector[0] = + (inputColVector.compareTo(0, leftValue) < 0 || + inputColVector.compareTo(0, rightValue) > 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputCol) + + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt new file mode 100644 index 0000000..4ab8440 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +public class extends { + private static final long serialVersionUID = 1L; + + public () { + super(); + } + + public (int colNum, byte[] left, byte[] right, int outputColumnNum) { + super(colNum, left, right, outputColumnNum); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index f5f4d72..e1482e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -183,12 +183,19 @@ public int getCount() { * @param logicalColumnIndex * @return */ - private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) { + private Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, + int logicalColumnIndex) { final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; final ColumnVector colVector = batch.cols[projectionColumnNum]; - return extractRowColumn( - colVector, typeInfos[logicalColumnIndex], objectInspectors[logicalColumnIndex], batchIndex); + final TypeInfo typeInfo = typeInfos[logicalColumnIndex]; + // try { + return extractRowColumn( + colVector, typeInfo, objectInspectors[logicalColumnIndex], batchIndex); + // } catch (Exception e){ + // throw new RuntimeException("Error evaluating column number " + projectionColumnNum + + // ", typeInfo " + typeInfo.toString() + ", batchIndex " + batchIndex); + // } } public Object extractRowColumn( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index e541217..97e4059 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1381,32 +1381,7 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI } } - switch (vectorArgType) { - case INT_FAMILY: - return new ConstantVectorExpression(outCol, ((Number) constantValue).longValue(), typeInfo); - case DATE: - return new ConstantVectorExpression(outCol, DateWritableV2.dateToDays((Date) constantValue), typeInfo); - case TIMESTAMP: - return new ConstantVectorExpression(outCol, - ((org.apache.hadoop.hive.common.type.Timestamp) constantValue).toSqlTimestamp(), typeInfo); - case INTERVAL_YEAR_MONTH: - return new ConstantVectorExpression(outCol, - ((HiveIntervalYearMonth) constantValue).getTotalMonths(), typeInfo); - case INTERVAL_DAY_TIME: - return new ConstantVectorExpression(outCol, (HiveIntervalDayTime) constantValue, typeInfo); - case FLOAT_FAMILY: - return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue(), typeInfo); - case DECIMAL: - return new ConstantVectorExpression(outCol, (HiveDecimal) constantValue, typeInfo); - case STRING: - return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes(), typeInfo); - case CHAR: - return new ConstantVectorExpression(outCol, ((HiveChar) constantValue), typeInfo); - case VARCHAR: - return new ConstantVectorExpression(outCol, ((HiveVarchar) constantValue), typeInfo); - default: - throw new HiveException("Unsupported constant type: " + typeName + ", object class " + constantValue.getClass().getSimpleName()); - } + return ConstantVectorExpression.create(outCol, constantValue, typeInfo); } private VectorExpression getDynamicValueVectorExpression(ExprNodeDynamicValueDesc dynamicValueExpr, @@ -1431,33 +1406,30 @@ private VectorExpression getDynamicValueVectorExpression(ExprNodeDynamicValueDes */ private VectorExpression getIdentityExpression(List childExprList) throws HiveException { + + if (childExprList.size() != 1) { + return null; + } ExprNodeDesc childExpr = childExprList.get(0); + if (!(childExpr instanceof ExprNodeColumnDesc)) { + + // Some vector operators like VectorSelectOperator optimize out IdentityExpression out of + // their vector expression list and don't evaluate the children, so just return the + // child expression here instead of IdentityExpression. + return getVectorExpression(childExpr); + } + int identityCol; TypeInfo identityTypeInfo; DataTypePhysicalVariation identityDataTypePhysicalVariation; - VectorExpression v1 = null; - if (childExpr instanceof ExprNodeGenericFuncDesc) { - v1 = getVectorExpression(childExpr); - identityCol = v1.getOutputColumnNum(); - identityTypeInfo = v1.getOutputTypeInfo(); - identityDataTypePhysicalVariation = v1.getOutputDataTypePhysicalVariation(); - } else if (childExpr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; - identityCol = getInputColumnIndex(colDesc.getColumn()); - identityTypeInfo = colDesc.getTypeInfo(); - - // CONSIDER: Validation of type information - - identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol); - } else { - throw new HiveException("Expression not supported: "+childExpr); - } - VectorExpression ve = new IdentityExpression(identityCol); + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; + identityCol = getInputColumnIndex(colDesc.getColumn()); + identityTypeInfo = colDesc.getTypeInfo(); - if (v1 != null) { - ve.setChildExpressions(new VectorExpression [] {v1}); - } + identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol); + + VectorExpression ve = new IdentityExpression(identityCol); ve.setInputTypeInfos(identityTypeInfo); ve.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation); @@ -1468,7 +1440,6 @@ private VectorExpression getIdentityExpression(List childExprList) return ve; } - private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException { if (exprNodeDesc instanceof ExprNodeColumnDesc) { int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc); @@ -1626,6 +1597,20 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); + return createDecimal64VectorExpression( + vectorClass, childExprs, childrenMode, + isDecimal64ScaleEstablished, decimal64ColumnScale, + returnTypeInfo, returnDataTypePhysicalVariation); + } + + private VectorExpression createDecimal64VectorExpression(Class vectorClass, + List childExprs, VectorExpressionDescriptor.Mode childrenMode, + boolean isDecimal64ScaleEstablished, int decimal64ColumnScale, + TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation) + throws HiveException { + + final int numChildren = childExprs.size(); + /* * Custom build arguments. */ @@ -1659,8 +1644,7 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd children.add(filterExpr); } arguments[i] = colIndex; - } else { - Preconditions.checkState(childExpr instanceof ExprNodeConstantDesc); + } else if (childExpr instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childExpr; if (typeInfo instanceof DecimalTypeInfo) { if (!isDecimal64ScaleEstablished) { @@ -1681,6 +1665,8 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd (scalarValue == null) ? getConstantVectorExpression(null, typeInfo, childrenMode) : scalarValue; } + } else { + return null; } } @@ -2040,8 +2026,28 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy return ve; } + // Handle strange case of TO_DATE(date) or CAST(date to DATE) + private VectorExpression getIdentityForDateToDate(List childExprs, + TypeInfo returnTypeInfo) + throws HiveException { + if (childExprs.size() != 1) { + return null; + } + TypeInfo childTypeInfo = childExprs.get(0).getTypeInfo(); + if (childTypeInfo.getCategory() != Category.PRIMITIVE || + ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory() != PrimitiveCategory.DATE) { + return null; + } + if (returnTypeInfo.getCategory() != Category.PRIMITIVE || + ((PrimitiveTypeInfo) returnTypeInfo).getPrimitiveCategory() != PrimitiveCategory.DATE) { + return null; + } + return getIdentityExpression(childExprs); + } + private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, - List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) + throws HiveException { List castedChildren = evaluateCastOnConstants(childExpr); childExpr = castedChildren; @@ -2049,8 +2055,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, //First handle special cases. If one of the special case methods cannot handle it, // it returns null. VectorExpression ve = null; - if (udf instanceof GenericUDFBetween && mode == VectorExpressionDescriptor.Mode.FILTER) { - ve = getBetweenFilterExpression(childExpr, mode, returnType); + if (udf instanceof GenericUDFBetween) { + ve = getBetweenExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { ve = getInExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIf) { @@ -2083,6 +2089,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, ve = getCastToBinary(childExpr, returnType); } else if (udf instanceof GenericUDFTimestamp) { ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + } else if (udf instanceof GenericUDFDate || udf instanceof GenericUDFToDate) { + ve = getIdentityForDateToDate(childExpr, returnType); } if (ve != null) { return ve; @@ -2444,14 +2452,42 @@ private VectorExpression getInExpression(List childExpr, expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); ((IDoubleInExpr) expr).setInListValues(inValsD); } else if (isDecimalFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class); - HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()]; - for (int i = 0; i != inValsD.length; i++) { - inValsD[i] = (HiveDecimal) getVectorTypeScalarValue( - (ExprNodeConstantDesc) childrenForInList.get(i)); + + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr); + if (tryDecimal64) { + cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? + FilterDecimal64ColumnInList.class : Decimal64ColumnInList.class); + final int scale = ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale(); + expr = createDecimal64VectorExpression( + cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, + /* isDecimal64ScaleEstablished */ true, + /* decimal64ColumnScale */ scale, + returnType, DataTypePhysicalVariation.NONE); + if (expr != null) { + long[] inVals = new long[childrenForInList.size()]; + for (int i = 0; i != inVals.length; i++) { + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childrenForInList.get(i); + HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue(); + final long decimal64Scalar = + new HiveDecimalWritable(hiveDecimal).serialize64(scale); + inVals[i] = decimal64Scalar; + } + ((ILongInExpr) expr).setInListValues(inVals); + } + } + if (expr == null) { + cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? + FilterDecimalColumnInList.class : DecimalColumnInList.class); + expr = createVectorExpression( + cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()]; + for (int i = 0; i != inValsD.length; i++) { + inValsD[i] = (HiveDecimal) getVectorTypeScalarValue( + (ExprNodeConstantDesc) childrenForInList.get(i)); + } + ((IDecimalInExpr) expr).setInListValues(inValsD); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((IDecimalInExpr) expr).setInListValues(inValsD); } else if (isDateFamily(colType)) { cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); long[] inVals = new long[childrenForInList.size()]; @@ -2973,21 +3009,32 @@ private VectorExpression getCastToLongExpression(List childExpr, P return null; } - /* Get a [NOT] BETWEEN filter expression. This is treated as a special case + private VectorExpression tryDecimal64Between(VectorExpressionDescriptor.Mode mode, boolean isNot, + ExprNodeDesc colExpr, List childrenAfterNot, TypeInfo returnTypeInfo) + throws HiveException { + final Class cl; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = (isNot ? Decimal64ColumnNotBetween.class : Decimal64ColumnBetween.class); + } else { + cl = (isNot ? FilterDecimal64ColumnNotBetween.class : FilterDecimal64ColumnBetween.class); + } + return + createDecimal64VectorExpression( + cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, + /* isDecimal64ScaleEstablished */ true, + /* decimal64ColumnScale */ ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale(), + returnTypeInfo, DataTypePhysicalVariation.NONE); + } + + /* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case * because the NOT is actually specified in the expression tree as the first argument, * and we don't want any runtime cost for that. So creating the VectorExpression * needs to be done differently than the standard way where all arguments are * passed to the VectorExpression constructor. */ - private VectorExpression getBetweenFilterExpression(List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) - throws HiveException { - - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - - // Projection mode is not yet supported for [NOT] BETWEEN. Return null so Vectorizer - // knows to revert to row-at-a-time execution. - return null; - } + private VectorExpression getBetweenExpression(List childExpr, + VectorExpressionDescriptor.Mode mode, TypeInfo returnType) + throws HiveException { boolean hasDynamicValues = false; @@ -2995,6 +3042,11 @@ private VectorExpression getBetweenFilterExpression(List childExpr if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) { hasDynamicValues = true; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + + // Projection mode is not applicable. + return null; + } } else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) { return null; @@ -3021,7 +3073,7 @@ private VectorExpression getBetweenFilterExpression(List childExpr } List castChildren = new ArrayList(); - + boolean wereCastUdfs = false; for (ExprNodeDesc desc: childExpr.subList(1, 4)) { if (commonType.equals(desc.getTypeInfo())) { castChildren.add(desc); @@ -3030,6 +3082,7 @@ private VectorExpression getBetweenFilterExpression(List childExpr ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc })); castChildren.add(engfd); + wereCastUdfs = true; } } String colType = commonType.getTypeName(); @@ -3040,55 +3093,141 @@ private VectorExpression getBetweenFilterExpression(List childExpr // determine class Class cl = null; if (isIntFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterLongColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterLongColumnBetweenDynamicValue.class : + FilterLongColumnBetween.class); + } } else if (isIntFamily(colType) && notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnNotBetween.class; + } else { + cl = FilterLongColumnNotBetween.class; + } } else if (isFloatFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDoubleColumnBetweenDynamicValue.class : - FilterDoubleColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DoubleColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDoubleColumnBetweenDynamicValue.class : + FilterDoubleColumnBetween.class); + } } else if (isFloatFamily(colType) && notKeywordPresent) { - cl = FilterDoubleColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DoubleColumnNotBetween.class; + } else { + cl = FilterDoubleColumnNotBetween.class; + } } else if (colType.equals("string") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterStringColumnBetweenDynamicValue.class : - FilterStringColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = StringColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterStringColumnBetweenDynamicValue.class : + FilterStringColumnBetween.class); + } } else if (colType.equals("string") && notKeywordPresent) { - cl = FilterStringColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = StringColumnNotBetween.class; + } else { + cl = FilterStringColumnNotBetween.class; + } } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterVarCharColumnBetweenDynamicValue.class : - FilterVarCharColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = VarCharColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterVarCharColumnBetweenDynamicValue.class : + FilterVarCharColumnBetween.class); + } } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) { - cl = FilterVarCharColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = VarCharColumnNotBetween.class; + } else { + cl = FilterVarCharColumnNotBetween.class; + } } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterCharColumnBetweenDynamicValue.class : - FilterCharColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = CharColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterCharColumnBetweenDynamicValue.class : + FilterCharColumnBetween.class); + } } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { - cl = FilterCharColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = CharColumnNotBetween.class; + } else { + cl = FilterCharColumnNotBetween.class; + } } else if (colType.equals("timestamp") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterTimestampColumnBetweenDynamicValue.class : - FilterTimestampColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = TimestampColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterTimestampColumnBetweenDynamicValue.class : + FilterTimestampColumnBetween.class); + } } else if (colType.equals("timestamp") && notKeywordPresent) { - cl = FilterTimestampColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = TimestampColumnNotBetween.class; + } else { + cl = FilterTimestampColumnNotBetween.class; + } } else if (isDecimalFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDecimalColumnBetweenDynamicValue.class : - FilterDecimalColumnBetween.class); + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues; + if (tryDecimal64) { + VectorExpression decimal64VecExpr = + tryDecimal64Between( + mode, /* isNot */ false, colExpr, childrenAfterNot, + returnType); + if (decimal64VecExpr != null) { + return decimal64VecExpr; + } + } + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DecimalColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDecimalColumnBetweenDynamicValue.class : + FilterDecimalColumnBetween.class); + } } else if (isDecimalFamily(colType) && notKeywordPresent) { - cl = FilterDecimalColumnNotBetween.class; + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues; + if (tryDecimal64) { + VectorExpression decimal64VecExpr = + tryDecimal64Between( + mode, /* isNot */ true, colExpr, childrenAfterNot, returnType); + if (decimal64VecExpr != null) { + return decimal64VecExpr; + } + } + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DecimalColumnNotBetween.class; + } else { + cl = FilterDecimalColumnNotBetween.class; + } } else if (isDateFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDateColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDateColumnBetweenDynamicValue.class : + FilterLongColumnBetween.class); + } } else if (isDateFamily(colType) && notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnNotBetween.class; + } else { + cl = FilterLongColumnNotBetween.class; + } } - return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression( + cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } private boolean isCondExpr(ExprNodeDesc exprNodeDesc) { @@ -3379,11 +3518,12 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn())); } else if (child instanceof ExprNodeConstantDesc) { // this is a constant (or null) - if (child.getTypeInfo().getCategory() != Category.PRIMITIVE) { + if (child.getTypeInfo().getCategory() != Category.PRIMITIVE && + child.getTypeInfo().getCategory() != Category.STRUCT) { // Complex type constants currently not supported by VectorUDFArgDesc.prepareConstant. throw new HiveException( - "Unable to vectorize custom UDF. Complex type constants not supported: " + child); + "Unable to vectorize custom UDF. LIST, MAP, and UNION type constants not supported: " + child); } argDescs[i].setConstant((ExprNodeConstantDesc) child); } else if (child instanceof ExprNodeDynamicValueDesc) { @@ -3523,7 +3663,11 @@ private Object getScalarValue(ExprNodeConstantDesc constDesc) private long getIntFamilyScalarAsLong(ExprNodeConstantDesc constDesc) throws HiveException { Object o = getScalarValue(constDesc); - if (o instanceof Integer) { + if (o instanceof Byte) { + return (Byte) o; + } if (o instanceof Short) { + return (Short) o; + } else if (o instanceof Integer) { return (Integer) o; } else if (o instanceof Long) { return (Long) o; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java deleted file mode 100644 index f99bd69..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; - -import java.sql.Date; - -/** - * Casts a timestamp and date vector to a date vector. - */ -public class CastLongToDate extends VectorExpression { - private static final long serialVersionUID = 1L; - - private int inputColumn; - private transient Date date = new Date(0); - - public CastLongToDate() { - super(); - } - - public CastLongToDate(int inputColumn, int outputColumnNum) { - super(outputColumnNum); - this.inputColumn = inputColumn; - } - - @Override - public void evaluate(VectorizedRowBatch batch) throws HiveException { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - LongColumnVector inV = (LongColumnVector) batch.cols[inputColumn]; - int[] sel = batch.selected; - int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; - - if (n == 0) { - - // Nothing to do - return; - } - - PrimitiveCategory primitiveCategory = - ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); - switch (primitiveCategory) { - case DATE: - inV.copySelected(batch.selectedInUse, batch.selected, batch.size, outV); - break; - default: - throw new Error("Unsupported input type " + primitiveCategory.name()); - } - } - - @Override - public String vectorExpressionParameters() { - return getColumnParamString(0, inputColumn); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATE) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); - return b.build(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 8ae8a54..25bc84a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -20,14 +20,23 @@ import java.nio.charset.StandardCharsets; import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -45,6 +54,7 @@ private HiveDecimal decimalValue = null; private Timestamp timestampValue = null; private HiveIntervalDayTime intervalDayTimeValue = null; + private ConstantVectorExpression[] structValue; private boolean isNullValue = false; private final ColumnVector.Type type; @@ -122,15 +132,133 @@ public ConstantVectorExpression(int outputColumnNum, TypeInfo outputTypeInfo, bo } /* + public static VectorExpression createList(int outputColumnNum, Object value, TypeInfo outputTypeInfo) + throws HiveException { + ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo); + result.setListValue(value); + return result; + } + + public static VectorExpression createMap(int outputColumnNum, Object value, TypeInfo outputTypeInfo) + throws HiveException { + ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo); + result.setMapValue(value); + return result; + } + */ + + public static ConstantVectorExpression createStruct(int outputColumnNum, Object value, + TypeInfo outputTypeInfo) + throws HiveException { + ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo); + result.setStructValue(value); + return result; + } + + /* + public static VectorExpression createUnion(int outputColumnNum, Object value, TypeInfo outputTypeInfo) + throws HiveException { + ConstantVectorExpression result = new ConstantVectorExpression(outputColumnNum, outputTypeInfo); + result.setUnionValue(value); + return result; + } + */ + + public static ConstantVectorExpression create(int outputColumnNum, Object constantValue, TypeInfo outputTypeInfo) + throws HiveException { + + if (constantValue == null) { + return new ConstantVectorExpression(outputColumnNum, outputTypeInfo, true); + } + + Category category = outputTypeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) outputTypeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + switch (primitiveCategory) { + case BOOLEAN: + if (((Boolean) constantValue).booleanValue()) { + return new ConstantVectorExpression(outputColumnNum, 1, outputTypeInfo); + } else { + return new ConstantVectorExpression(outputColumnNum, 0, outputTypeInfo); + } + case BYTE: + case SHORT: + case INT: + case LONG: + return new ConstantVectorExpression( + outputColumnNum, ((Number) constantValue).longValue(), outputTypeInfo); + case FLOAT: + case DOUBLE: + return new ConstantVectorExpression( + outputColumnNum, ((Number) constantValue).doubleValue(), outputTypeInfo); + case DATE: + return new ConstantVectorExpression( + outputColumnNum, DateWritableV2.dateToDays((Date) constantValue), outputTypeInfo); + case TIMESTAMP: + return new ConstantVectorExpression( + outputColumnNum, + ((org.apache.hadoop.hive.common.type.Timestamp) constantValue).toSqlTimestamp(), + outputTypeInfo); + case DECIMAL: + return new ConstantVectorExpression( + outputColumnNum, (HiveDecimal) constantValue, outputTypeInfo); + case STRING: + return new ConstantVectorExpression( + outputColumnNum, ((String) constantValue).getBytes(), outputTypeInfo); + case VARCHAR: + return new ConstantVectorExpression( + outputColumnNum, ((HiveVarchar) constantValue), outputTypeInfo); + case CHAR: + return new ConstantVectorExpression( + outputColumnNum, ((HiveChar) constantValue), outputTypeInfo); + case INTERVAL_YEAR_MONTH: + return new ConstantVectorExpression( + outputColumnNum, + ((HiveIntervalYearMonth) constantValue).getTotalMonths(), + outputTypeInfo); + case INTERVAL_DAY_TIME: + return new ConstantVectorExpression( + outputColumnNum, + (HiveIntervalDayTime) constantValue, + outputTypeInfo); + case VOID: + case BINARY: + case TIMESTAMPLOCALTZ: + case UNKNOWN: + default: + throw new RuntimeException("Unexpected primitive category " + primitiveCategory); + } + } + // case LIST: + // return ConstantVectorExpression.createList( + // outputColumnNum, constantValue, outputTypeInfo); + // case MAP: + // return ConstantVectorExpression.createMap( + // outputColumnNum, constantValue, outputTypeInfo); + case STRUCT: + return ConstantVectorExpression.createStruct( + outputColumnNum, constantValue, outputTypeInfo); + // case UNION: + // return ConstantVectorExpression.createUnion( + // outputColumnNum, constantValue, outputTypeInfo); + default: + throw new RuntimeException("Unexpected category " + category); + } + } + + /* * In the following evaluate* methods, since we are supporting scratch column reuse, we must * assume the column may have noNulls of false and some isNull entries true. * * So, do a proper assignments. */ - private void evaluateLong(VectorizedRowBatch vrg) { + private void evaluateLong(ColumnVector colVector) { - LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; + LongColumnVector cv = (LongColumnVector) colVector; cv.isRepeating = true; if (!isNullValue) { cv.isNull[0] = false; @@ -141,8 +269,8 @@ private void evaluateLong(VectorizedRowBatch vrg) { } } - private void evaluateDouble(VectorizedRowBatch vrg) { - DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; + private void evaluateDouble(ColumnVector colVector) { + DoubleColumnVector cv = (DoubleColumnVector) colVector; cv.isRepeating = true; if (!isNullValue) { cv.isNull[0] = false; @@ -153,8 +281,8 @@ private void evaluateDouble(VectorizedRowBatch vrg) { } } - private void evaluateBytes(VectorizedRowBatch vrg) { - BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; + private void evaluateBytes(ColumnVector colVector) { + BytesColumnVector cv = (BytesColumnVector) colVector; cv.isRepeating = true; cv.initBuffer(); if (!isNullValue) { @@ -166,8 +294,8 @@ private void evaluateBytes(VectorizedRowBatch vrg) { } } - private void evaluateDecimal(VectorizedRowBatch vrg) { - DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; + private void evaluateDecimal(ColumnVector colVector) { + DecimalColumnVector dcv = (DecimalColumnVector) colVector; dcv.isRepeating = true; if (!isNullValue) { dcv.isNull[0] = false; @@ -178,8 +306,8 @@ private void evaluateDecimal(VectorizedRowBatch vrg) { } } - private void evaluateTimestamp(VectorizedRowBatch vrg) { - TimestampColumnVector tcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; + private void evaluateTimestamp(ColumnVector colVector) { + TimestampColumnVector tcv = (TimestampColumnVector) colVector; tcv.isRepeating = true; if (!isNullValue) { tcv.isNull[0] = false; @@ -190,8 +318,8 @@ private void evaluateTimestamp(VectorizedRowBatch vrg) { } } - private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { - IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; + private void evaluateIntervalDayTime(ColumnVector colVector) { + IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) colVector; dcv.isRepeating = true; if (!isNullValue) { dcv.isNull[0] = false; @@ -202,8 +330,23 @@ private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { } } - private void evaluateVoid(VectorizedRowBatch vrg) { - VoidColumnVector voidColVector = (VoidColumnVector) vrg.cols[outputColumnNum]; + private void evaluateStruct(ColumnVector colVector) { + StructColumnVector scv = (StructColumnVector) colVector; + scv.isRepeating = true; + if (!isNullValue) { + scv.isNull[0] = false; + final int size = structValue.length; + for (int i = 0; i < size; i++) { + structValue[i].evaluateColumn(scv.fields[i]); + } + } else { + scv.isNull[0] = true; + scv.noNulls = false; + } + } + + private void evaluateVoid(ColumnVector colVector) { + VoidColumnVector voidColVector = (VoidColumnVector) colVector; voidColVector.isRepeating = true; voidColVector.isNull[0] = true; voidColVector.noNulls = false; @@ -211,27 +354,34 @@ private void evaluateVoid(VectorizedRowBatch vrg) { @Override public void evaluate(VectorizedRowBatch vrg) { + evaluateColumn(vrg.cols[outputColumnNum]); + } + + private void evaluateColumn(ColumnVector colVector) { switch (type) { case LONG: - evaluateLong(vrg); + evaluateLong(colVector); break; case DOUBLE: - evaluateDouble(vrg); + evaluateDouble(colVector); break; case BYTES: - evaluateBytes(vrg); + evaluateBytes(colVector); break; case DECIMAL: - evaluateDecimal(vrg); + evaluateDecimal(colVector); break; case TIMESTAMP: - evaluateTimestamp(vrg); + evaluateTimestamp(colVector); break; case INTERVAL_DAY_TIME: - evaluateIntervalDayTime(vrg); + evaluateIntervalDayTime(colVector); + break; + case STRUCT: + evaluateStruct(colVector); break; case VOID: - evaluateVoid(vrg); + evaluateVoid(colVector); break; default: throw new RuntimeException("Unexpected column vector type " + type); @@ -287,6 +437,17 @@ public HiveIntervalDayTime getIntervalDayTimeValue() { return intervalDayTimeValue; } + public void setStructValue(Object structValue) throws HiveException { + StructTypeInfo structTypeInfo = (StructTypeInfo) outputTypeInfo; + ArrayList fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos(); + final int size = fieldTypeInfoList.size(); + this.structValue = new ConstantVectorExpression[size]; + List fieldValueList = (List) structValue; + for (int i = 0; i < size; i++) { + this.structValue[i] = create(i, fieldValueList.get(i), fieldTypeInfoList.get(i)); + } + } + @Override public String vectorExpressionParameters() { String value; @@ -313,6 +474,24 @@ public String vectorExpressionParameters() { case INTERVAL_DAY_TIME: value = intervalDayTimeValue.toString(); break; + case STRUCT: + { + StringBuilder sb = new StringBuilder(); + sb.append("STRUCT {"); + boolean isFirst = true; + final int size = structValue.length; + for (int i = 0; i < size; i++) { + if (isFirst) { + isFirst = false; + } else { + sb.append(", "); + } + sb.append(structValue[i].toString()); + } + sb.append("}"); + value = sb.toString(); + } + break; default: throw new RuntimeException("Unknown vector column type " + type); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java new file mode 100644 index 0000000..5632cfb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class Decimal64ColumnInList extends LongColumnInList { + + private static final long serialVersionUID = 1L; + + public Decimal64ColumnInList(int colNum, int outputColumnNum) { + super(colNum, outputColumnNum); + } + + public Decimal64ColumnInList() { + super(); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) inputTypeInfos[0]; + final int scale = decimalTypeInfo.scale(); + HiveDecimalWritable writable = new HiveDecimalWritable(); + StringBuilder sb = new StringBuilder(); + sb.append(getColumnParamString(0, colNum)); + sb.append(", values ["); + for (long value : inListValues) { + writable.deserialize64(value, scale); + sb.append(", decimal64Val "); + sb.append(value); + sb.append(", decimalVal "); + sb.append(writable.toString()); + } + sb.append("]"); + return sb.toString(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // return null since this will be handled as a special case in VectorizationContext + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java new file mode 100644 index 0000000..c26a93a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +public class FilterDecimal64ColumnBetween extends FilterLongColumnBetween { + + private static final long serialVersionUID = 1L; + + public FilterDecimal64ColumnBetween(int colNum, long leftValue, long rightValue) { + super(colNum, leftValue, rightValue); + } + + public FilterDecimal64ColumnBetween() { + super(); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1]; + HiveDecimalWritable writable1 = new HiveDecimalWritable(); + writable1.deserialize64(leftValue, decimalTypeInfo1.scale()); + + DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2]; + HiveDecimalWritable writable2 = new HiveDecimalWritable(); + writable2.deserialize64(rightValue, decimalTypeInfo2.scale()); + return + getColumnParamString(0, colNum) + + ", decimal64LeftVal " + leftValue + ", decimalLeftVal " + writable1.toString() + + ", decimal64RightVal " + rightValue + ", decimalRightVal " + writable2.toString(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnInList.java new file mode 100644 index 0000000..a75cdbf --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnInList.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +/** + * Evaluate IN filter on a batch for a vector of longs. + */ +public class FilterDecimal64ColumnInList extends FilterLongColumnInList { + + private static final long serialVersionUID = 1L; + + public FilterDecimal64ColumnInList() { + super(); + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public FilterDecimal64ColumnInList(int colNum) { + super(colNum); + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) inputTypeInfos[0]; + final int scale = decimalTypeInfo.scale(); + HiveDecimalWritable writable = new HiveDecimalWritable(); + StringBuilder sb = new StringBuilder(); + sb.append(getColumnParamString(0, inputCol)); + sb.append(", values ["); + for (long value : inListValues) { + writable.deserialize64(value, scale); + sb.append(", decimal64Val "); + sb.append(value); + sb.append(", decimalVal "); + sb.append(writable.toString()); + } + sb.append("]"); + return sb.toString(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnNotBetween.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnNotBetween.java new file mode 100644 index 0000000..13d5c1a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnNotBetween.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +public class FilterDecimal64ColumnNotBetween extends FilterLongColumnNotBetween { + + private static final long serialVersionUID = 1L; + + public FilterDecimal64ColumnNotBetween(int colNum, long leftValue, long rightValue) { + super(colNum, leftValue, rightValue); + } + + public FilterDecimal64ColumnNotBetween() { + super(); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1]; + HiveDecimalWritable writable1 = new HiveDecimalWritable(); + writable1.deserialize64(leftValue, decimalTypeInfo1.scale()); + + DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2]; + HiveDecimalWritable writable2 = new HiveDecimalWritable(); + writable2.deserialize64(rightValue, decimalTypeInfo2.scale()); + return + getColumnParamString(0, colNum) + + ", decimal64LeftVal " + leftValue + ", decimalLeftVal " + writable1.toString() + + ", decimal64RightVal " + rightValue + ", decimalRightVal " + writable2.toString(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java index 312a388..7306bbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java @@ -36,8 +36,9 @@ public class FilterLongColumnInList extends VectorExpression implements ILongInExpr { private static final long serialVersionUID = 1L; - private final int inputCol; - private long[] inListValues; + + protected final int inputCol; + protected long[] inListValues; // Transient members initialized by transientInit method. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java index 8469882..d519141 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java @@ -33,8 +33,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long[] inListValues; + protected int colNum; + protected long[] inListValues; // The set object containing the IN list. This is optimized for lookup // of the data type of the column. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java index 55c2586..9328eb4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -170,6 +170,11 @@ public void setInListValues(byte [][] a) { @Override public String vectorExpressionParameters() { - return getColumnParamString(0, inputCol) + ", values " + Arrays.toString(inListValues); + StringBuilder sb = new StringBuilder(); + sb.append("col "); + sb.append(inputCol); + sb.append(", values "); + sb.append(displayArrayOfUtf8ByteArrays(inListValues)); + return sb.toString(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java deleted file mode 100644 index 8e5f9da..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -/** - * Vectorized version of TO_DATE(TIMESTAMP)/TO_DATE(DATE). - * As TO_DATE() now returns DATE type, this should be the same behavior as the DATE cast operator. - */ -public class VectorUDFDateLong extends CastLongToDate { - private static final long serialVersionUID = 1L; - - public VectorUDFDateLong() { - super(); - } - - public VectorUDFDateLong(int inputColumn, int outputColumnNum) { - super(inputColumn, outputColumnNum); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java index 69a2bef..af35ee6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector.udf; import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -27,10 +29,20 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Descriptor for function argument. @@ -73,13 +85,55 @@ public void setConstant(ExprNodeConstantDesc expr) { public void prepareConstant() { final Object writableValue; if (constExpr != null) { - PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo()) - .getPrimitiveCategory(); - - // Convert from Java to Writable - writableValue = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject( - constExpr.getValue()); + Object constantValue = constExpr.getValue(); + TypeInfo typeInfo = constExpr.getTypeInfo(); + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo); + Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + { + PrimitiveCategory pc = + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + + // Convert from Java to Writable + AbstractPrimitiveJavaObjectInspector primitiveJavaObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pc); + writableValue = + primitiveJavaObjectInspector.getPrimitiveWritableObject(constantValue); + } + break; + case STRUCT: + { + if (constantValue.getClass().isArray()) { + constantValue = java.util.Arrays.asList((Object[]) constantValue); + } + + StructObjectInspector structObjectInspector = + (StructObjectInspector) objectInspector; + List fields = structObjectInspector.getAllStructFieldRefs(); + List fieldNames = new ArrayList(fields.size()); + List fieldObjectInspectors = + new ArrayList(fields.size()); + for (StructField f : fields) { + fieldNames.add(f.getFieldName()); + fieldObjectInspectors.add( + ObjectInspectorUtils.getStandardObjectInspector( + f.getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE)); + } + + StandardConstantStructObjectInspector constantStructObjectInspector = + ObjectInspectorFactory.getStandardConstantStructObjectInspector( + fieldNames, + fieldObjectInspectors, + (List) constantValue); + writableValue = + constantStructObjectInspector.getWritableConstantValue(); + } + break; + default: + throw new RuntimeException("Unexpected category " + category); + } } else { writableValue = null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java index f5c4eb5..3c39af7e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateString; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateTimestamp; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -48,7 +47,7 @@ extended = "Example:\n " + " > SELECT _FUNC_('2009-07-30 04:17:52') FROM src LIMIT 1;\n" + " '2009-07-30'") -@VectorizedExpressions({VectorUDFDateString.class, VectorUDFDateLong.class, VectorUDFDateTimestamp.class}) +@VectorizedExpressions({VectorUDFDateString.class, VectorUDFDateTimestamp.class}) public class GenericUDFDate extends GenericUDF { private transient TimestampConverter timestampConverter; private transient Converter textConverter; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java index b53ddcb..c309ffa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDate; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -42,7 +41,7 @@ + "Example:\n " + " > SELECT CAST('2009-01-01' AS DATE) FROM src LIMIT 1;\n" + " '2009-01-01'") -@VectorizedExpressions({CastStringToDate.class, CastLongToDate.class, CastTimestampToDate.class}) +@VectorizedExpressions({CastStringToDate.class, CastTimestampToDate.class}) public class GenericUDFToDate extends GenericUDF { private transient PrimitiveObjectInspector argumentOI; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index b6ae7d2..dfbf9d4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -118,6 +118,8 @@ private List primitiveObjectInspectorList; + private List columnNames; + private StructObjectInspector rowStructObjectInspector; private List generationSpecList; @@ -159,20 +161,24 @@ public boolean getAddPadding() { OMIT_GENERATION, STRING_FAMILY, STRING_FAMILY_OTHER_TYPE_VALUE, - TIMESTAMP_MILLISECONDS + TIMESTAMP_MILLISECONDS, + VALUE_LIST } private final GenerationKind generationKind; private final TypeInfo typeInfo; private final TypeInfo sourceTypeInfo; private final StringGenerationOption stringGenerationOption; + private final List valueList; private GenerationSpec(GenerationKind generationKind, TypeInfo typeInfo, - TypeInfo sourceTypeInfo, StringGenerationOption stringGenerationOption) { + TypeInfo sourceTypeInfo, StringGenerationOption stringGenerationOption, + List valueList) { this.generationKind = generationKind; this.typeInfo = typeInfo; this.sourceTypeInfo = sourceTypeInfo; this.stringGenerationOption = stringGenerationOption; + this.valueList = valueList; } public GenerationKind getGenerationKind() { @@ -191,31 +197,40 @@ public StringGenerationOption getStringGenerationOption() { return stringGenerationOption; } + public List getValueList() { + return valueList; + } + public static GenerationSpec createSameType(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.SAME_TYPE, typeInfo, null, null); + GenerationKind.SAME_TYPE, typeInfo, null, null, null); } public static GenerationSpec createOmitGeneration(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.OMIT_GENERATION, typeInfo, null, null); + GenerationKind.OMIT_GENERATION, typeInfo, null, null, null); } public static GenerationSpec createStringFamily(TypeInfo typeInfo, StringGenerationOption stringGenerationOption) { return new GenerationSpec( - GenerationKind.STRING_FAMILY, typeInfo, null, stringGenerationOption); + GenerationKind.STRING_FAMILY, typeInfo, null, stringGenerationOption, null); } public static GenerationSpec createStringFamilyOtherTypeValue(TypeInfo typeInfo, TypeInfo otherTypeTypeInfo) { return new GenerationSpec( - GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, otherTypeTypeInfo, null); + GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, otherTypeTypeInfo, null, null); } public static GenerationSpec createTimestampMilliseconds(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, null, null); + GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, null, null, null); + } + + public static GenerationSpec createValueList(TypeInfo typeInfo, List valueList) { + return new GenerationSpec( + GenerationKind.VALUE_LIST, typeInfo, null, null, valueList); } } @@ -243,6 +258,10 @@ public static GenerationSpec createTimestampMilliseconds(TypeInfo typeInfo) { return primitiveTypeInfos; } + public List columnNames() { + return columnNames; + } + public StructObjectInspector rowStructObjectInspector() { return rowStructObjectInspector; } @@ -342,7 +361,7 @@ public void initGenerationSpecSchema(Random r, List generationSp "map" }; - private static String getRandomTypeName(Random random, SupportedTypes supportedTypes, + public static String getRandomTypeName(Random random, SupportedTypes supportedTypes, Set allowedTypeNameSet) { String typeName = null; @@ -370,7 +389,7 @@ public static String getDecoratedTypeName(Random random, String typeName) { return getDecoratedTypeName(random, typeName, null, null, 0, 1); } - private static String getDecoratedTypeName(Random random, String typeName, + public static String getDecoratedTypeName(Random random, String typeName, SupportedTypes supportedTypes, Set allowedTypeNameSet, int depth, int maxDepth) { depth++; @@ -421,7 +440,7 @@ private static String getDecoratedTypeName(Random random, String typeName, if (i > 0) { sb.append(","); } - sb.append("col"); + sb.append("field"); sb.append(i); sb.append(":"); sb.append(fieldTypeName); @@ -549,7 +568,7 @@ private void chooseSchema(SupportedTypes supportedTypes, Set allowedType allTypes = false; onlyOne = false; } else if (allowedTypeNameSet != null) { - columnCount = 1 + r.nextInt(20); + columnCount = 1 + r.nextInt(allowedTypeNameSet.size()); allTypes = false; onlyOne = false; } else { @@ -586,9 +605,9 @@ private void chooseSchema(SupportedTypes supportedTypes, Set allowedType primitiveCategories = new PrimitiveCategory[columnCount]; primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; primitiveObjectInspectorList = new ArrayList(columnCount); - List columnNames = new ArrayList(columnCount); + columnNames = new ArrayList(columnCount); for (int c = 0; c < columnCount; c++) { - columnNames.add(String.format("col%d", c)); + columnNames.add(String.format("col%d", c + 1)); final String typeName; DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE; @@ -902,6 +921,13 @@ public static Object randomStringFamily(Random random, TypeInfo typeInfo, object = longWritable; } break; + case VALUE_LIST: + { + List valueList = generationSpec.getValueList(); + final int valueCount = valueList.size(); + object = valueList.get(r.nextInt(valueCount)); + } + break; default: throw new RuntimeException("Unexpected generationKind " + generationKind); } @@ -1180,6 +1206,42 @@ public static Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeI } } + public static Object getWritableObject(TypeInfo typeInfo, + ObjectInspector objectInspector, Object object) { + + final Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + return + getWritablePrimitiveObject( + (PrimitiveTypeInfo) typeInfo, + objectInspector, DataTypePhysicalVariation.NONE, object); + case STRUCT: + { + final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + final StandardStructObjectInspector structInspector = + (StandardStructObjectInspector) objectInspector; + final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + final int size = fieldTypeInfos.size(); + final List structFields = + structInspector.getAllStructFieldRefs(); + + List input = (ArrayList) object; + List result = new ArrayList(size); + for (int i = 0; i < size; i++) { + final StructField structField = structFields.get(i); + final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i); + result.add( + getWritableObject( + fieldTypeInfo, structField.getFieldObjectInspector(), input.get(i))); + } + return result; + } + default: + throw new RuntimeException("Unexpected category " + category); + } + } + public static Object getNonWritablePrimitiveObject(Object object, TypeInfo typeInfo, ObjectInspector objectInspector) { @@ -1290,41 +1352,91 @@ public static Object getNonWritablePrimitiveObject(Object object, TypeInfo typeI } } + public static Object getNonWritableObject(Object object, TypeInfo typeInfo, + ObjectInspector objectInspector) { + final Category category = typeInfo.getCategory(); + switch (category) { + case PRIMITIVE: + return getNonWritablePrimitiveObject(object, typeInfo, objectInspector); + case STRUCT: + { + final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + final StandardStructObjectInspector structInspector = + (StandardStructObjectInspector) objectInspector; + final List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + final int size = fieldTypeInfos.size(); + final List structFields = + structInspector.getAllStructFieldRefs(); + + List input = (ArrayList) object; + List result = new ArrayList(size); + for (int i = 0; i < size; i++) { + final StructField structField = structFields.get(i); + final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i); + result.add( + getNonWritableObject(input.get(i), fieldTypeInfo, + structField.getFieldObjectInspector())); + } + return result; + } + default: + throw new RuntimeException("Unexpected category " + category); + } + } + public Object randomWritable(int column) { return randomWritable( - typeInfos[column], objectInspectorList.get(column), dataTypePhysicalVariations[column], + r, typeInfos[column], objectInspectorList.get(column), dataTypePhysicalVariations[column], allowNull); } public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector) { - return randomWritable(typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); + return randomWritable(r, typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); } public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, boolean allowNull) { - return randomWritable(typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); + return randomWritable(r, typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); } public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, DataTypePhysicalVariation dataTypePhysicalVariation, boolean allowNull) { + return randomWritable(r, typeInfo, objectInspector, dataTypePhysicalVariation, allowNull); + } + + public static Object randomWritable(Random random, TypeInfo typeInfo, + ObjectInspector objectInspector) { + return randomWritable( + random, typeInfo, objectInspector, DataTypePhysicalVariation.NONE, false); + } + + public static Object randomWritable(Random random, TypeInfo typeInfo, + ObjectInspector objectInspector, boolean allowNull) { + return randomWritable( + random, typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); + } + + public static Object randomWritable(Random random, TypeInfo typeInfo, + ObjectInspector objectInspector, DataTypePhysicalVariation dataTypePhysicalVariation, + boolean allowNull) { switch (typeInfo.getCategory()) { case PRIMITIVE: { - if (allowNull && r.nextInt(20) == 0) { + if (allowNull && random.nextInt(20) == 0) { return null; } - final Object object = randomPrimitiveObject(r, (PrimitiveTypeInfo) typeInfo); + final Object object = randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo); return getWritablePrimitiveObject( (PrimitiveTypeInfo) typeInfo, objectInspector, dataTypePhysicalVariation, object); } case LIST: { - if (allowNull && r.nextInt(20) == 0) { + if (allowNull && random.nextInt(20) == 0) { return null; } // Always generate a list with at least 1 value? - final int elementCount = 1 + r.nextInt(100); + final int elementCount = 1 + random.nextInt(100); final StandardListObjectInspector listObjectInspector = (StandardListObjectInspector) objectInspector; final ObjectInspector elementObjectInspector = @@ -1345,7 +1457,8 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, } final Object listObj = listObjectInspector.create(elementCount); for (int i = 0; i < elementCount; i++) { - final Object ele = randomWritable(elementTypeInfo, elementObjectInspector, allowNull); + final Object ele = randomWritable( + random, elementTypeInfo, elementObjectInspector, allowNull); // UNDONE: For now, a 1-element list with a null element is a null list... if (ele == null && elementCount == 1) { return null; @@ -1382,10 +1495,10 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, } case MAP: { - if (allowNull && r.nextInt(20) == 0) { + if (allowNull && random.nextInt(20) == 0) { return null; } - final int keyPairCount = r.nextInt(100); + final int keyPairCount = random.nextInt(100); final StandardMapObjectInspector mapObjectInspector = (StandardMapObjectInspector) objectInspector; final ObjectInspector keyObjectInspector = @@ -1400,15 +1513,15 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, valueObjectInspector); final Object mapObj = mapObjectInspector.create(); for (int i = 0; i < keyPairCount; i++) { - Object key = randomWritable(keyTypeInfo, keyObjectInspector); - Object value = randomWritable(valueTypeInfo, valueObjectInspector); + Object key = randomWritable(random, keyTypeInfo, keyObjectInspector); + Object value = randomWritable(random, valueTypeInfo, valueObjectInspector); mapObjectInspector.put(mapObj, key, value); } return mapObj; } case STRUCT: { - if (allowNull && r.nextInt(20) == 0) { + if (allowNull && random.nextInt(20) == 0) { return null; } final StandardStructObjectInspector structObjectInspector = @@ -1423,7 +1536,7 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, final TypeInfo fieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector( fieldObjectInspector); - final Object fieldObj = randomWritable(fieldTypeInfo, fieldObjectInspector); + final Object fieldObj = randomWritable(random, fieldTypeInfo, fieldObjectInspector); structObjectInspector.setStructFieldData(structObj, fieldRef, fieldObj); } return structObj; @@ -1434,13 +1547,13 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, (StandardUnionObjectInspector) objectInspector; final List objectInspectorList = unionObjectInspector.getObjectInspectors(); final int unionCount = objectInspectorList.size(); - final byte tag = (byte) r.nextInt(unionCount); + final byte tag = (byte) random.nextInt(unionCount); final ObjectInspector fieldObjectInspector = objectInspectorList.get(tag); final TypeInfo fieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector( fieldObjectInspector); - final Object fieldObj = randomWritable(fieldTypeInfo, fieldObjectInspector, false); + final Object fieldObj = randomWritable(random, fieldTypeInfo, fieldObjectInspector, false); if (fieldObj == null) { throw new RuntimeException(); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java index a09daf3..1b61071 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmetic.java @@ -89,7 +89,7 @@ public TestVectorArithmetic() { // Arithmetic operations rely on getting conf from SessionState, need to initialize here. SessionState ss = new SessionState(new HiveConf()); - ss.getConf().setVar(HiveConf.ConfVars.HIVE_COMPAT, "latest"); + ss.getConf().setVar(HiveConf.ConfVars.HIVE_COMPAT, "default"); SessionState.setCurrentSessionState(ss); } @@ -364,7 +364,7 @@ private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, Type new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; ExprNodeDesc col1Expr; Object scalar1Object = null; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java new file mode 100644 index 0000000..3f1a137 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java @@ -0,0 +1,1014 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.TestVectorArithmetic.ColumnScalarMode; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.io.WritableComparable; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestVectorBetweenIn { + + @Test + public void testTinyInt() throws Exception { + Random random = new Random(5371); + + doBetweenIn(random, "tinyint"); + } + + @Test + public void testSmallInt() throws Exception { + Random random = new Random(2772); + + doBetweenIn(random, "smallint"); + } + + @Test + public void testInt() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "int"); + } + + @Test + public void testBigInt() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "bigint"); + } + + @Test + public void testString() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "string"); + } + + @Test + public void testTimestamp() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "timestamp"); + } + + @Test + public void testDate() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "date"); + } + + @Test + public void testFloat() throws Exception { + Random random = new Random(7322); + + doBetweenIn(random, "float"); + } + + @Test + public void testDouble() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "double"); + } + + @Test + public void testChar() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "char(10)"); + } + + @Test + public void testVarchar() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "varchar(15)"); + } + + @Test + public void testDecimal() throws Exception { + Random random = new Random(9300); + + doDecimalTests(random, /* tryDecimal64 */ false); + } + + @Test + public void testDecimal64() throws Exception { + Random random = new Random(9300); + + doDecimalTests(random, /* tryDecimal64 */ true); + } + + @Test + public void testStruct() throws Exception { + Random random = new Random(9300); + + doStructTests(random); + } + + public enum BetweenInTestMode { + ROW_MODE, + ADAPTOR, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + public enum BetweenInVariation { + FILTER_BETWEEN, + FILTER_NOT_BETWEEN, + PROJECTION_BETWEEN, + PROJECTION_NOT_BETWEEN, + FILTER_IN, + PROJECTION_IN; + + static final int count = values().length; + + final boolean isFilter; + BetweenInVariation() { + isFilter = name().startsWith("FILTER"); + } + } + + private static TypeInfo[] decimalTypeInfos = new TypeInfo[] { + new DecimalTypeInfo(38, 18), + new DecimalTypeInfo(25, 2), + new DecimalTypeInfo(19, 4), + new DecimalTypeInfo(18, 10), + new DecimalTypeInfo(17, 3), + new DecimalTypeInfo(12, 2), + new DecimalTypeInfo(7, 1) + }; + + private void doDecimalTests(Random random, boolean tryDecimal64) + throws Exception { + for (TypeInfo typeInfo : decimalTypeInfos) { + doBetweenIn( + random, typeInfo.getTypeName(), tryDecimal64); + } + } + + private void doBetweenIn(Random random, String typeName) + throws Exception { + doBetweenIn(random, typeName, /* tryDecimal64 */ false); + } + + private static final BetweenInVariation[] structInVarations = + new BetweenInVariation[] { BetweenInVariation.FILTER_IN, BetweenInVariation.PROJECTION_IN }; + + private void doStructTests(Random random) throws Exception { + + String typeName = "struct"; + + // These are the only type supported for STRUCT IN by the VectorizationContext class. + Set allowedTypeNameSet = new HashSet(); + allowedTypeNameSet.add("int"); + allowedTypeNameSet.add("bigint"); + allowedTypeNameSet.add("double"); + allowedTypeNameSet.add("string"); + + // Only STRUCT type IN currently supported. + for (BetweenInVariation betweenInVariation : structInVarations) { + + for (int i = 0; i < 4; i++) { + typeName = + VectorRandomRowSource.getDecoratedTypeName( + random, typeName, SupportedTypes.ALL, allowedTypeNameSet, + /* depth */ 0, /* maxDepth */ 1); + + doBetweenStructInVariation( + random, typeName, betweenInVariation); + } + } + } + + private void doBetweenIn(Random random, String typeName, boolean tryDecimal64) + throws Exception { + + int subVariation; + for (BetweenInVariation betweenInVariation : BetweenInVariation.values()) { + subVariation = 0; + while (true) { + if (!doBetweenInVariation( + random, typeName, tryDecimal64, betweenInVariation, subVariation)) { + break; + } + subVariation++; + } + } + } + + private boolean checkDecimal64(boolean tryDecimal64, TypeInfo typeInfo) { + if (!tryDecimal64 || !(typeInfo instanceof DecimalTypeInfo)) { + return false; + } + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; + boolean result = HiveDecimalWritable.isPrecisionDecimal64(decimalTypeInfo.getPrecision()); + return result; + } + + private void removeValue(List valueList, Object value) { + valueList.remove(value); + } + + private boolean needsValidDataTypeData(TypeInfo typeInfo) { + if (!(typeInfo instanceof PrimitiveTypeInfo)) { + return false; + } + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + if (primitiveCategory == PrimitiveCategory.STRING || + primitiveCategory == PrimitiveCategory.CHAR || + primitiveCategory == PrimitiveCategory.VARCHAR || + primitiveCategory == PrimitiveCategory.BINARY) { + return false; + } + return true; + } + + private boolean doBetweenInVariation(Random random, String typeName, + boolean tryDecimal64, BetweenInVariation betweenInVariation, int subVariation) + throws Exception { + + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + + boolean isDecimal64 = checkDecimal64(tryDecimal64, typeInfo); + DataTypePhysicalVariation dataTypePhysicalVariation = + (isDecimal64 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE); + final int decimal64Scale = + (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0); + + //---------------------------------------------------------------------------------------------- + + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + typeInfo); + + final int valueCount = 10 + random.nextInt(10); + List valueList = new ArrayList(valueCount); + for (int i = 0; i < valueCount; i++) { + valueList.add( + VectorRandomRowSource.randomWritable( + random, typeInfo, objectInspector, dataTypePhysicalVariation, /* allowNull */ false)); + } + + final boolean isBetween = + (betweenInVariation == BetweenInVariation.FILTER_BETWEEN || + betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || + betweenInVariation == BetweenInVariation.PROJECTION_BETWEEN || + betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN); + + List compareList = new ArrayList(); + + List sortedList = new ArrayList(valueCount); + sortedList.addAll(valueList); + + Object object = valueList.get(0); + WritableComparator writableComparator = + WritableComparator.get((Class) object.getClass()); + sortedList.sort(writableComparator); + + final boolean isInvert; + if (isBetween) { + + // FILTER_BETWEEN + // FILTER_NOT_BETWEEN + // PROJECTION_BETWEEN + // PROJECTION_NOT_BETWEEN + isInvert = + (betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || + betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN); + switch (subVariation) { + case 0: + // Range covers all values exactly. + compareList.add(sortedList.get(0)); + compareList.add(sortedList.get(valueCount - 1)); + break; + case 1: + // Exclude the first and last sorted. + compareList.add(sortedList.get(1)); + compareList.add(sortedList.get(valueCount - 2)); + break; + case 2: + // Only last 2 sorted. + compareList.add(sortedList.get(valueCount - 2)); + compareList.add(sortedList.get(valueCount - 1)); + break; + case 3: + case 4: + case 5: + case 6: + { + // Choose 2 adjacent in the middle. + Object min = sortedList.get(5); + Object max = sortedList.get(6); + compareList.add(min); + compareList.add(max); + if (subVariation == 4) { + removeValue(valueList, min); + } else if (subVariation == 5) { + removeValue(valueList, max); + } else if (subVariation == 6) { + removeValue(valueList, min); + removeValue(valueList, max); + } + } + break; + default: + return false; + } + } else { + + // FILTER_IN. + // PROJECTION_IN. + isInvert = false; + switch (subVariation) { + case 0: + // All values. + compareList.addAll(valueList); + break; + case 1: + // Don't include the first and last sorted. + for (int i = 1; i < valueCount - 1; i++) { + compareList.add(valueList.get(i)); + } + break; + case 2: + // The even ones. + for (int i = 2; i < valueCount; i += 2) { + compareList.add(valueList.get(i)); + } + break; + case 3: + { + // Choose 2 adjacent in the middle. + Object min = sortedList.get(5); + Object max = sortedList.get(6); + compareList.add(min); + compareList.add(max); + if (subVariation == 4) { + removeValue(valueList, min); + } else if (subVariation == 5) { + removeValue(valueList, max); + } else if (subVariation == 6) { + removeValue(valueList, min); + removeValue(valueList, max); + } + } + break; + default: + return false; + } + } + + //---------------------------------------------------------------------------------------------- + + GenerationSpec generationSpec = GenerationSpec.createValueList(typeInfo, valueList); + + List generationSpecList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + generationSpecList.add(generationSpec); + explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + List columns = new ArrayList(); + String col1Name = rowSource.columnNames().get(0); + columns.add(col1Name); + final ExprNodeDesc col1Expr = new ExprNodeColumnDesc(typeInfo, col1Name, "table", false); + + List children = new ArrayList(); + if (isBetween) { + children.add(new ExprNodeConstantDesc(new Boolean(isInvert))); + } + children.add(col1Expr); + for (Object compareObject : compareList) { + ExprNodeConstantDesc constDesc = + new ExprNodeConstantDesc( + typeInfo, + VectorRandomRowSource.getNonWritableObject( + compareObject, typeInfo, objectInspector)); + children.add(constDesc); + } + + String[] columnNames = columns.toArray(new String[0]); + + Object[][] randomRows = rowSource.randomRows(100000); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + final GenericUDF udf; + final ObjectInspector outputObjectInspector; + if (isBetween) { + + udf = new GenericUDFBetween(); + + // First argument is boolean invert. Arguments 1..3 are inspectors for range limits... + ObjectInspector[] argumentOIs = new ObjectInspector[4]; + argumentOIs[0] = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + argumentOIs[1] = objectInspector; + argumentOIs[2] = objectInspector; + argumentOIs[3] = objectInspector; + outputObjectInspector = udf.initialize(argumentOIs); + } else { + final int compareCount = compareList.size(); + udf = new GenericUDFIn(); + ObjectInspector[] argumentOIs = new ObjectInspector[compareCount]; + ConstantObjectInspector constantObjectInspector = + (ConstantObjectInspector) children.get(1).getWritableObjectInspector(); + for (int i = 0; i < compareCount; i++) { + argumentOIs[i] = constantObjectInspector; + } + outputObjectInspector = udf.initialize(argumentOIs); + } + + TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, udf, children); + + return executeTestModesAndVerify( + typeInfo, betweenInVariation, compareList, columns, columnNames, children, + udf, exprDesc, + randomRows, rowSource, batchSource, outputTypeInfo, + /* skipAdaptor */ false); + } + + private boolean doBetweenStructInVariation(Random random, String structTypeName, + BetweenInVariation betweenInVariation) + throws Exception { + + StructTypeInfo structTypeInfo = + (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName); + + ObjectInspector structObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + structTypeInfo); + + final int valueCount = 10 + random.nextInt(10); + List valueList = new ArrayList(valueCount); + for (int i = 0; i < valueCount; i++) { + valueList.add( + VectorRandomRowSource.randomWritable( + random, structTypeInfo, structObjectInspector, DataTypePhysicalVariation.NONE, + /* allowNull */ false)); + } + + final boolean isInvert = false; + + // No convenient WritableComparator / WritableComparable available for STRUCT. + List compareList = new ArrayList(); + + Set includedSet = new HashSet(); + final int chooseLimit = 4 + random.nextInt(valueCount/2); + int chooseCount = 0; + while (chooseCount < chooseLimit) { + final int index = random.nextInt(valueCount); + if (includedSet.contains(index)) { + continue; + } + includedSet.add(index); + compareList.add(valueList.get(index)); + chooseCount++; + } + + //---------------------------------------------------------------------------------------------- + + GenerationSpec structGenerationSpec = GenerationSpec.createValueList(structTypeInfo, valueList); + + List structGenerationSpecList = new ArrayList(); + List structExplicitDataTypePhysicalVariationList = + new ArrayList(); + structGenerationSpecList.add(structGenerationSpec); + structExplicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + VectorRandomRowSource structRowSource = new VectorRandomRowSource(); + + structRowSource.initGenerationSpecSchema( + random, structGenerationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + structExplicitDataTypePhysicalVariationList); + + Object[][] structRandomRows = structRowSource.randomRows(100000); + + // --------------------------------------------------------------------------------------------- + + List generationSpecList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + + List fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos(); + final int fieldCount = fieldTypeInfoList.size(); + for (int i = 0; i < fieldCount; i++) { + GenerationSpec generationSpec = GenerationSpec.createOmitGeneration(fieldTypeInfoList.get(i)); + generationSpecList.add(generationSpec); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + } + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + Object[][] randomRows = rowSource.randomRows(100000); + + final int rowCount = randomRows.length; + for (int r = 0; r < rowCount; r++) { + List fieldValueList = (ArrayList) structRandomRows[r][0]; + for (int f = 0; f < fieldCount; f++) { + randomRows[r][f] = fieldValueList.get(f); + } + } + + // --------------------------------------------------------------------------------------------- + + // Currently, STRUCT IN vectorization assumes a GenericUDFStruct. + + List structUdfObjectInspectorList = new ArrayList(); + List structUdfChildren = new ArrayList(fieldCount); + List rowColumnNameList = rowSource.columnNames(); + for (int i = 0; i < fieldCount; i++) { + TypeInfo fieldTypeInfo = fieldTypeInfoList.get(i); + ExprNodeColumnDesc fieldExpr = + new ExprNodeColumnDesc( + fieldTypeInfo, rowColumnNameList.get(i), "table", false); + structUdfChildren.add(fieldExpr); + ObjectInspector fieldObjectInspector = + VectorRandomRowSource.getObjectInspector(fieldTypeInfo, DataTypePhysicalVariation.NONE); + structUdfObjectInspectorList.add(fieldObjectInspector); + } + StandardStructObjectInspector structUdfObjectInspector = + ObjectInspectorFactory. + getStandardStructObjectInspector(rowColumnNameList, structUdfObjectInspectorList); + String structUdfTypeName = structUdfObjectInspector.getTypeName(); + TypeInfo structUdfTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(structUdfTypeName); + + String structFuncText = "struct"; + FunctionInfo fi = FunctionRegistry.getFunctionInfo(structFuncText); + GenericUDF genericUDF = fi.getGenericUDF(); + ExprNodeDesc col1Expr = + new ExprNodeGenericFuncDesc( + structUdfObjectInspector, genericUDF, structFuncText, structUdfChildren); + + // --------------------------------------------------------------------------------------------- + + List columns = new ArrayList(); + + List children = new ArrayList(); + children.add(col1Expr); + for (int i = 0; i < compareList.size(); i++) { + Object compareObject = compareList.get(i); + ExprNodeConstantDesc constDesc = + new ExprNodeConstantDesc( + structUdfTypeInfo, + VectorRandomRowSource.getNonWritableObject( + compareObject, structUdfTypeInfo, structUdfObjectInspector)); + children.add(constDesc); + } + + for (int i = 0; i < fieldCount; i++) { + columns.add(rowColumnNameList.get(i)); + } + + String[] columnNames = columns.toArray(new String[0]); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + // --------------------------------------------------------------------------------------------- + + final GenericUDF udf = new GenericUDFIn(); + final int compareCount = compareList.size(); + ObjectInspector[] argumentOIs = new ObjectInspector[compareCount]; + for (int i = 0; i < compareCount; i++) { + argumentOIs[i] = structUdfObjectInspector; + } + final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs); + + TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, udf, children); + + return executeTestModesAndVerify( + structUdfTypeInfo, betweenInVariation, compareList, columns, columnNames, children, + udf, exprDesc, + randomRows, rowSource, batchSource, outputTypeInfo, + /* skipAdaptor */ true); + } + + private boolean executeTestModesAndVerify(TypeInfo typeInfo, + BetweenInVariation betweenInVariation, List compareList, + List columns, String[] columnNames, List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, + VectorRandomRowSource rowSource, VectorRandomBatchSource batchSource, + TypeInfo outputTypeInfo, boolean skipAdaptor) + throws Exception { + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[BetweenInTestMode.count][]; + for (int i = 0; i < BetweenInTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + BetweenInTestMode betweenInTestMode = BetweenInTestMode.values()[i]; + switch (betweenInTestMode) { + case ROW_MODE: + if (!doRowCastTest( + typeInfo, + betweenInVariation, + compareList, + columns, + children, + udf, exprDesc, + randomRows, + rowSource.rowStructObjectInspector(), + resultObjects)) { + return false; + } + break; + case ADAPTOR: + if (skipAdaptor) { + continue; + } + case VECTOR_EXPRESSION: + if (!doVectorCastTest( + typeInfo, + betweenInVariation, + compareList, + columns, + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + children, + udf, exprDesc, + betweenInTestMode, + batchSource, + exprDesc.getWritableObjectInspector(), + outputTypeInfo, + resultObjects)) { + return false; + } + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + betweenInTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < BetweenInTestMode.count; v++) { + BetweenInTestMode betweenInTestMode = BetweenInTestMode.values()[v]; + if (skipAdaptor) { + continue; + } + Object vectorResult = resultObjectsArray[v][i]; + if (betweenInVariation.isFilter && + expectedResult == null && + vectorResult != null) { + // This is OK. + boolean vectorBoolean = ((BooleanWritable) vectorResult).get(); + if (vectorBoolean) { + Assert.fail( + "Row " + i + + " typeName " + typeInfo.getTypeName() + + " outputTypeName " + outputTypeInfo.getTypeName() + + " " + betweenInVariation + + " " + betweenInTestMode + + " result is NOT NULL and true" + + " does not match row-mode expected result is NULL which means false here" + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } else if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeInfo.getTypeName() + + " " + betweenInVariation + + " " + betweenInTestMode + + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + + " does not match row-mode expected result is NULL " + + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeInfo.getTypeName() + + " " + betweenInVariation + + " " + betweenInTestMode + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } + } + } + return true; + } + + private boolean doRowCastTest(TypeInfo typeInfo, + BetweenInVariation betweenInVariation, List compareList, + List columns, List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, + ObjectInspector rowInspector, Object[] resultObjects) + throws Exception { + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " targetTypeInfo " + targetTypeInfo + + " betweenInTestMode ROW_MODE" + + " exprDesc " + exprDesc.toString()); + */ + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + + evaluator.initialize(rowInspector); + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, PrimitiveObjectInspectorFactory.writableBooleanObjectInspector, + ObjectInspectorCopyOption.WRITABLE); + resultObjects[i] = copyResult; + } + + return true; + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + ObjectInspector objectInspector, Object[] resultObjects) { + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + } + + private boolean doVectorCastTest(TypeInfo typeInfo, + BetweenInVariation betweenInVariation, List compareList, + List columns, String[] columnNames, + TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + BetweenInTestMode betweenInTestMode, + VectorRandomBatchSource batchSource, + ObjectInspector objectInspector, + TypeInfo outputTypeInfo, Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (betweenInTestMode == BetweenInTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + final boolean isFilter = betweenInVariation.isFilter; + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = + vectorizationContext.getVectorExpression(exprDesc, + (isFilter ? + VectorExpressionDescriptor.Mode.FILTER : + VectorExpressionDescriptor.Mode.PROJECTION)); + vectorExpression.transientInit(); + + if (betweenInTestMode == BetweenInTestMode.VECTOR_EXPRESSION && + vectorExpression instanceof VectorUDFAdaptor) { + System.out.println( + "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + + " betweenInTestMode " + betweenInTestMode + + " betweenInVariation " + betweenInVariation + + " vectorExpression " + vectorExpression.toString()); + } + + // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " betweenInTestMode " + betweenInTestMode + + " betweenInVariation " + betweenInVariation + + " vectorExpression " + vectorExpression.toString()); + */ + + VectorRandomRowSource rowSource = batchSource.getRowSource(); + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vectorizationContext.getScratchColumnTypeNames(), + vectorizationContext.getScratchDataTypePhysicalVariations()); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = null; + Object[] scrqtchRow = null; + if (!isFilter) { + resultVectorExtractRow = new VectorExtractRow(); + final int outputColumnNum = vectorExpression.getOutputColumnNum(); + resultVectorExtractRow.init( + new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum }); + scrqtchRow = new Object[1]; + } + + boolean copySelectedInUse = false; + int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + final int originalBatchSize = batch.size; + if (isFilter) { + copySelectedInUse = batch.selectedInUse; + if (batch.selectedInUse) { + System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize); + } + } + + // In filter mode, the batch size can be made smaller. + vectorExpression.evaluate(batch); + + if (!isFilter) { + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + objectInspector, resultObjects); + } else { + final int currentBatchSize = batch.size; + if (copySelectedInUse && batch.selectedInUse) { + int selectIndex = 0; + for (int i = 0; i < originalBatchSize; i++) { + final int originalBatchIndex = copySelected[i]; + final boolean booleanResult; + if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) { + booleanResult = true; + selectIndex++; + } else { + booleanResult = false; + } + resultObjects[rowIndex + i] = new BooleanWritable(booleanResult); + } + } else if (batch.selectedInUse) { + int selectIndex = 0; + for (int i = 0; i < originalBatchSize; i++) { + final boolean booleanResult; + if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) { + booleanResult = true; + selectIndex++; + } else { + booleanResult = false; + } + resultObjects[rowIndex + i] = new BooleanWritable(booleanResult); + } + } else if (currentBatchSize == 0) { + // Whole batch got zapped. + for (int i = 0; i < originalBatchSize; i++) { + resultObjects[rowIndex + i] = new BooleanWritable(false); + } + } else { + // Every row kept. + for (int i = 0; i < originalBatchSize; i++) { + resultObjects[rowIndex + i] = new BooleanWritable(true); + } + } + } + + rowIndex += originalBatchSize; + } + + return true; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java index d4d8ef7..cc1415a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java @@ -189,9 +189,14 @@ private void doIfTests(Random random, String typeName, for (PrimitiveCategory targetPrimitiveCategory : PrimitiveCategory.values()) { + if (targetPrimitiveCategory == PrimitiveCategory.INTERVAL_YEAR_MONTH || + targetPrimitiveCategory == PrimitiveCategory.INTERVAL_DAY_TIME) { + if (primitiveCategory != PrimitiveCategory.STRING) { + continue; + } + } + if (targetPrimitiveCategory == PrimitiveCategory.VOID || - targetPrimitiveCategory == PrimitiveCategory.INTERVAL_YEAR_MONTH || - targetPrimitiveCategory == PrimitiveCategory.INTERVAL_DAY_TIME || targetPrimitiveCategory == PrimitiveCategory.TIMESTAMPLOCALTZ || targetPrimitiveCategory == PrimitiveCategory.UNKNOWN) { continue; @@ -273,7 +278,8 @@ private void doIfTestOneCast(Random random, String typeName, } List generationSpecList = new ArrayList(); - List explicitDataTypePhysicalVariationList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); generationSpecList.add(generationSpec); explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); @@ -284,8 +290,8 @@ private void doIfTestOneCast(Random random, String typeName, explicitDataTypePhysicalVariationList); List columns = new ArrayList(); - columns.add("col0"); - ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col0", "table", false); + columns.add("col1"); + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col1", "table", false); List children = new ArrayList(); children.add(col1Expr); @@ -443,7 +449,12 @@ private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, int[] selected = batch.selected; for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + + try { resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + } catch (Exception e) { + System.out.println("here"); + } // UNDONE: Need to copy the object. resultObjects[rowIndex++] = scrqtchRow[0]; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java index 4dc01be..68c14c8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java @@ -181,7 +181,7 @@ private void doDateAddSubTestsWithDiffColumnScalar(Random random, String dateTim new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; ExprNodeDesc col1Expr; if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { @@ -253,8 +253,8 @@ private void doDateAddSubTestsWithDiffColumnScalar(Random random, String dateTim // Fixup numbers to limit the range to 0 ... N-1. for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; - if (row[columnNum - 1] != null) { - row[columnNum - 1] = + if (row[columnNum - 2] != null) { + row[columnNum - 2] = smallerRange( random, integerPrimitiveCategory, /* wantWritable */ true); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java index c5c5c72..0da9d8c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java @@ -179,7 +179,7 @@ private void doDateDiffTestsWithDiffColumnScalar(Random random, String dateTimeS new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; ExprNodeDesc col1Expr; if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterCompare.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterCompare.java index 1ff11ec..ba9eaca 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterCompare.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterCompare.java @@ -329,7 +329,7 @@ private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, Type new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; ExprNodeDesc col1Expr; Object scalar1Object = null; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java index e7884b2..9d57aec 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java @@ -657,7 +657,7 @@ private void validateDate(VectorizedRowBatch batch, PrimitiveCategory colType, } else if (colType == PrimitiveCategory.TIMESTAMP) { udf = new VectorUDFDateTimestamp(0, 1); } else { - udf = new VectorUDFDateLong(0, 1); + throw new RuntimeException("Unexpected column type " + colType); } udf.setInputTypeInfos(new TypeInfo[] {primitiveCategoryToTypeInfo(colType)}); @@ -684,6 +684,9 @@ private void validateDate(VectorizedRowBatch batch, PrimitiveCategory colType, @Test public void testDate() throws HiveException { for (PrimitiveCategory colType : dateTimestampStringTypes) { + if (colType == PrimitiveCategory.DATE) { + continue; + } LongColumnVector date = newRandomLongColumnVector(10000, size); LongColumnVector output = new LongColumnVector(size); @@ -722,7 +725,7 @@ private void validateToDate(VectorizedRowBatch batch, PrimitiveCategory colType, } else if (colType == PrimitiveCategory.TIMESTAMP) { udf = new CastTimestampToDate(0, 1); } else { - udf = new CastLongToDate(0, 1); + throw new RuntimeException("Unexpected column type " + colType); } udf.setInputTypeInfos(new TypeInfo[] {primitiveCategoryToTypeInfo(colType)}); udf.transientInit(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java index 58e32ca..666d26c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorIfStatement.java @@ -247,10 +247,10 @@ private void doIfTestsWithDiffColumnScalar(Random random, String typeName, explicitDataTypePhysicalVariationList); List columns = new ArrayList(); - columns.add("col0"); // The boolean predicate. + columns.add("col1"); // The boolean predicate. - ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Boolean.class, "col0", "table", false); - int columnNum = 1; + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Boolean.class, "col1", "table", false); + int columnNum = 2; ExprNodeDesc col2Expr; if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java index 54c085b..ea39848 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNegative.java @@ -195,7 +195,7 @@ private void doTests(Random random, TypeInfo typeInfo) new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; generationSpecList.add( GenerationSpec.createSameType(typeInfo)); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java new file mode 100644 index 0000000..a4fc0d57 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorNull.java @@ -0,0 +1,513 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.TestVectorArithmetic.ColumnScalarMode; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.io.WritableComparable; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestVectorNull { + + @Test + public void testIsNull() throws Exception { + Random random = new Random(5371); + + doNull(random, "isnull"); + } + + @Test + public void testIsNotNull() throws Exception { + Random random = new Random(2772); + + doNull(random, "isnotnull"); + } + + @Test + public void testNot() throws Exception { + Random random = new Random(2772); + + doNull(random, "not"); + } + + public enum NullTestMode { + ROW_MODE, + ADAPTOR, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + private void doNull(Random random, String functionName) + throws Exception { + + // Several different random types... + doIsNullOnRandomDataType(random, functionName, true); + doIsNullOnRandomDataType(random, functionName, true); + doIsNullOnRandomDataType(random, functionName, true); + + doIsNullOnRandomDataType(random, functionName, false); + doIsNullOnRandomDataType(random, functionName, false); + doIsNullOnRandomDataType(random, functionName, false); + } + + private boolean doIsNullOnRandomDataType(Random random, String functionName, boolean isFilter) + throws Exception { + + String typeName; + if (functionName.equals("not")) { + typeName = "boolean"; + } else { + typeName = + VectorRandomRowSource.getRandomTypeName( + random, SupportedTypes.ALL, /* allowedTypeNameSet */ null); + typeName = + VectorRandomRowSource.getDecoratedTypeName( + random, typeName, SupportedTypes.ALL, /* allowedTypeNameSet */ null, + /* depth */ 0, /* maxDepth */ 2); + } + + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + + //---------------------------------------------------------------------------------------------- + + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + typeInfo); + + //---------------------------------------------------------------------------------------------- + + GenerationSpec generationSpec = GenerationSpec.createSameType(typeInfo); + + List generationSpecList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + generationSpecList.add(generationSpec); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + List columns = new ArrayList(); + columns.add("col1"); + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col1", "table", false); + + List children = new ArrayList(); + children.add(col1Expr); + + String[] columnNames = columns.toArray(new String[0]); + + Object[][] randomRows = rowSource.randomRows(100000); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + final GenericUDF udf; + final ObjectInspector outputObjectInspector; + switch (functionName) { + case "isnull": + udf = new GenericUDFOPNull(); + break; + case "isnotnull": + udf = new GenericUDFOPNotNull(); + break; + case "not": + udf = new GenericUDFOPNot(); + break; + default: + throw new RuntimeException("Unexpected function name " + functionName); + } + + ObjectInspector[] argumentOIs = new ObjectInspector[] { objectInspector }; + outputObjectInspector = udf.initialize(argumentOIs); + + TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, udf, children); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[NullTestMode.count][]; + for (int i = 0; i < NullTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + NullTestMode nullTestMode = NullTestMode.values()[i]; + switch (nullTestMode) { + case ROW_MODE: + if (!doRowCastTest( + typeInfo, + isFilter, + columns, + children, + udf, exprDesc, + randomRows, + rowSource.rowStructObjectInspector(), + resultObjects)) { + return false; + } + break; + case ADAPTOR: + case VECTOR_EXPRESSION: + if (!doVectorCastTest( + typeInfo, + isFilter, + columns, + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + children, + udf, exprDesc, + nullTestMode, + batchSource, + exprDesc.getWritableObjectInspector(), + outputTypeInfo, + resultObjects)) { + return false; + } + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + nullTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < NullTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + NullTestMode nullTestMode = NullTestMode.values()[v]; + if (isFilter && + expectedResult == null && + vectorResult != null) { + // This is OK. + boolean vectorBoolean = ((BooleanWritable) vectorResult).get(); + if (vectorBoolean) { + Assert.fail( + "Row " + i + + " typeName " + typeName + + " outputTypeName " + outputTypeInfo.getTypeName() + + " isFilter " + isFilter + + " " + nullTestMode + + " result is NOT NULL and true" + + " does not match row-mode expected result is NULL which means false here" + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } else if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeName + + " isFilter " + isFilter + + " " + nullTestMode + + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + + " does not match row-mode expected result is NULL " + + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeName + + " isFilter " + isFilter + + " " + nullTestMode + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } + } + } + return true; + } + + private boolean doRowCastTest(TypeInfo typeInfo, boolean isFilter, + List columns, List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, + ObjectInspector rowInspector, Object[] resultObjects) + throws Exception { + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " targetTypeInfo " + targetTypeInfo + + " nullTestMode ROW_MODE" + + " exprDesc " + exprDesc.toString()); + */ + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + try { + evaluator.initialize(rowInspector); + } catch (HiveException e) { + return false; + } + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, PrimitiveObjectInspectorFactory.writableBooleanObjectInspector, + ObjectInspectorCopyOption.WRITABLE); + resultObjects[i] = copyResult; + } + + return true; + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + ObjectInspector objectInspector, Object[] resultObjects) { + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + } + + private boolean doVectorCastTest(TypeInfo typeInfo, boolean isFilter, + List columns, String[] columnNames, + TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + NullTestMode nullTestMode, + VectorRandomBatchSource batchSource, + ObjectInspector objectInspector, + TypeInfo outputTypeInfo, Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (nullTestMode == NullTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = + vectorizationContext.getVectorExpression(exprDesc, + (isFilter ? + VectorExpressionDescriptor.Mode.FILTER : + VectorExpressionDescriptor.Mode.PROJECTION)); + vectorExpression.transientInit(); + + if (nullTestMode == NullTestMode.VECTOR_EXPRESSION && + vectorExpression instanceof VectorUDFAdaptor) { + System.out.println( + "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + + " nullTestMode " + nullTestMode + + " isFilter " + isFilter + + " vectorExpression " + vectorExpression.toString()); + } + + System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " nullTestMode " + nullTestMode + + " isFilter " + isFilter + + " vectorExpression " + vectorExpression.toString()); + */ + + VectorRandomRowSource rowSource = batchSource.getRowSource(); + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vectorizationContext.getScratchColumnTypeNames(), + vectorizationContext.getScratchDataTypePhysicalVariations()); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = null; + Object[] scrqtchRow = null; + if (!isFilter) { + resultVectorExtractRow = new VectorExtractRow(); + final int outputColumnNum = vectorExpression.getOutputColumnNum(); + resultVectorExtractRow.init( + new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum }); + scrqtchRow = new Object[1]; + } + + boolean copySelectedInUse = false; + int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + final int originalBatchSize = batch.size; + if (isFilter) { + copySelectedInUse = batch.selectedInUse; + if (batch.selectedInUse) { + System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize); + } + } + + // In filter mode, the batch size can be made smaller. + vectorExpression.evaluate(batch); + + if (!isFilter) { + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + objectInspector, resultObjects); + } else { + final int currentBatchSize = batch.size; + if (copySelectedInUse && batch.selectedInUse) { + int selectIndex = 0; + for (int i = 0; i < originalBatchSize; i++) { + final int originalBatchIndex = copySelected[i]; + final boolean booleanResult; + if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) { + booleanResult = true; + selectIndex++; + } else { + booleanResult = false; + } + resultObjects[rowIndex + i] = new BooleanWritable(booleanResult); + } + } else if (batch.selectedInUse) { + int selectIndex = 0; + for (int i = 0; i < originalBatchSize; i++) { + final boolean booleanResult; + if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) { + booleanResult = true; + selectIndex++; + } else { + booleanResult = false; + } + resultObjects[rowIndex + i] = new BooleanWritable(booleanResult); + } + } else if (currentBatchSize == 0) { + // Whole batch got zapped. + for (int i = 0; i < originalBatchSize; i++) { + resultObjects[rowIndex + i] = new BooleanWritable(false); + } + } else { + // Every row kept. + for (int i = 0; i < originalBatchSize; i++) { + resultObjects[rowIndex + i] = new BooleanWritable(true); + } + } + } + + rowIndex += originalBatchSize; + } + + return true; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java index 69fd70c..8877b06 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java @@ -145,7 +145,7 @@ private void doStringConcatTestsWithDiffColumnScalar(Random random, new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; ExprNodeDesc col1Expr; if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java index f029358..dd53157 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringUnary.java @@ -133,7 +133,7 @@ private void doTests(Random random, String typeName, String functionName) new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; ExprNodeDesc col1Expr; StringGenerationOption stringGenerationOption = new StringGenerationOption(true, true); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java index 694f6f7..a978782 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorSubStr.java @@ -102,7 +102,7 @@ private void doTests(Random random, boolean useLength) new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; ExprNodeDesc col1Expr; StringGenerationOption stringGenerationOption = new StringGenerationOption(true, true); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java index 5d5e4c9..c31bec5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExtract.java @@ -118,7 +118,7 @@ private void doIfTestOneTimestampExtract(Random random, String dateTimeStringTyp new ArrayList(); List columns = new ArrayList(); - int columnNum = 0; + int columnNum = 1; ExprNodeDesc col1Expr; if (!isStringFamily) { generationSpecList.add( diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 12ae103..801dda3 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -1390,7 +1390,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(cdate BETWEEN DATE'1969-12-30' AND DATE'1970-01-02') -> 5:boolean + selectExpressions: LongColumnBetween(col 3:date, left -2, right 1) -> 5:boolean Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1425,7 +1425,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -1528,7 +1528,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 5:boolean + selectExpressions: DecimalColumnNotBetween(col 1:decimal(20,10), left -2000, right 4390.1351351351) -> 5:boolean Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1563,7 +1563,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index 966f6c5..d2b1fa4 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -408,7 +408,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3, 10, 12, 13, 14, 11, 7, 16, 23, 2] - selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date + selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) @@ -693,7 +693,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3, 15, 26, 36, 40, 42, 44, 46, 53, 2] - selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date + selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 1908418..15cd648 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -562,8 +562,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 3:bigint, VectorUDFYearDate(col 0, field YEAR) -> 4:int, VectorUDFMonthDate(col 0, field MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 7:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 8:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 9:int, VectorUDFDateLong(col 0:date) -> 10:date, VectorUDFDateAddColScalar(col 0:date, val 2) -> 11:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 12:date, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 13:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 14:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 15:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 16:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 17:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 18:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 19:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 20:int + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 0, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 3:bigint, VectorUDFYearDate(col 0, field YEAR) -> 4:int, VectorUDFMonthDate(col 0, field MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 7:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 8:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 9:int, VectorUDFDateAddColScalar(col 0:date, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 11:date, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 12:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 13:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 14:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 15:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 16:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 17:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 18:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 19:int Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -858,8 +858,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 0, 5, 6, 7, 8, 9, 10, 4, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - selectExpressions: LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 3:int, VectorUDFYearDate(col 0, field YEAR) -> 4:int) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 3:int, VectorUDFMonthDate(col 0, field MONTH) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfWeekTimestamp(col 1:timestamp, field DAY_OF_WEEK) -> 3:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:date, col 0:date)(children: CastTimestampToDate(col 1:timestamp) -> 3:date) -> 4:boolean, LongColEqualLongColumn(col 3:date, col 11:date)(children: VectorUDFDateTimestamp(col 1:timestamp) -> 3:date, VectorUDFDateLong(col 0:date) -> 11:date) -> 12:boolean, LongColEqualLongColumn(col 3:date, col 11:date)(children: VectorUDFDateAddColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateAddColScalar(col 0:date, val 2) -> 11:date) -> 13:boolean, LongColEqualLongColumn(col 3:date, col 11:date)(children: VectorUDFDateSubColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 11:date) -> 14:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 11:int) -> 15:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 16:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 17:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 18:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 11:int) -> 19:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 20:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 21:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 22:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 23:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 24:boolean + projectedOutputColumnNums: [1, 0, 5, 6, 7, 8, 9, 10, 4, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + selectExpressions: LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 3:int, VectorUDFYearDate(col 0, field YEAR) -> 4:int) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 3:int, VectorUDFMonthDate(col 0, field MONTH) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfWeekTimestamp(col 1:timestamp, field DAY_OF_WEEK) -> 3:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:date, col 0:date)(children: CastTimestampToDate(col 1:timestamp) -> 3:date) -> 4:boolean, LongColEqualLongColumn(col 3:date, col 0:date)(children: VectorUDFDateTimestamp(col 1:timestamp) -> 3:date, col 0:date) -> 11:boolean, LongColEqualLongColumn(col 3:date, col 12:date)(children: VectorUDFDateAddColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateAddColScalar(col 0:date, val 2) -> 12:date) -> 13:boolean, LongColEqualLongColumn(col 3:date, col 12:date)(children: VectorUDFDateSubColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 12:date) -> 14:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 12:int) -> 15:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 16:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 17:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 18:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 12:int) -> 19:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 20:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 21:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 22:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 23:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 24:boolean Statistics: Num rows: 137 Data size: 12672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1126,8 +1126,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 5, 6, 7, 9] - selectExpressions: VectorUDFDateLong(col 3:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date) -> 4:date, VectorUDFDateLong(col 3:date)(children: VectorUDFDateSubColScalar(col 0:date, val 2) -> 3:date) -> 5:date, VectorUDFDateDiffColCol(col 0:date, col 3:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date) -> 6:int, VectorUDFDateDiffColCol(col 0:date, col 3:date)(children: VectorUDFDateSubColScalar(col 0:date, val 2) -> 3:date) -> 7:int, VectorUDFDateDiffColCol(col 3:date, col 8:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 8:date) -> 9:int + projectedOutputColumnNums: [0, 3, 4, 6, 7, 9] + selectExpressions: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 4:date, VectorUDFDateDiffColCol(col 0:date, col 5:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 5:date) -> 6:int, VectorUDFDateDiffColCol(col 0:date, col 5:date)(children: VectorUDFDateSubColScalar(col 0:date, val 2) -> 5:date) -> 7:int, VectorUDFDateDiffColCol(col 5:date, col 8:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 5:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 8:date) -> 9:int Statistics: Num rows: 137 Data size: 7392 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 4edd0e4..9033b88 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -1310,7 +1310,7 @@ STAGE PLANS: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: Group By Operator @@ -1332,7 +1332,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5, 7, 8, 11, 6, 12, 13, 14] - selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, DoubleColumnBetween(col 6:double, left 8.97077295279421E19, right 8.97077295279422E19)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, DoubleColumnBetween(col 6:double, left 8.97077295279421E19, right 8.97077295279422E19)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, DoubleColumnBetween(col 6:double, left 9.20684592523616E19, right 9.20684592523617E19)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 78bcd26..8b1a2be 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -1369,7 +1369,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(cdate BETWEEN DATE'1969-12-30' AND DATE'1970-01-02') -> 5:boolean + selectExpressions: LongColumnBetween(col 3:date, left -2, right 1) -> 5:boolean Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1403,7 +1403,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized @@ -1505,7 +1505,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 5:boolean + selectExpressions: DecimalColumnNotBetween(col 1:decimal(20,10), left -2000, right 4390.1351351351) -> 5:boolean Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1539,7 +1539,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index f094fba..3944542 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -1296,7 +1296,7 @@ STAGE PLANS: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: Group By Operator @@ -1318,7 +1318,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5, 7, 8, 11, 6, 12, 13, 14] - selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, DoubleColumnBetween(col 6:double, left 8.97077295279421E19, right 8.97077295279422E19)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, DoubleColumnBetween(col 6:double, left 8.97077295279421E19, right 8.97077295279422E19)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, DoubleColumnBetween(col 6:double, left 9.20684592523616E19, right 9.20684592523617E19)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/vector_case_when_2.q.out ql/src/test/results/clientpositive/vector_case_when_2.q.out index 9ff8750..342c518 100644 --- ql/src/test/results/clientpositive/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/vector_case_when_2.q.out @@ -376,7 +376,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3, 10, 12, 13, 14, 11, 7, 16, 23, 2] - selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date + selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date Statistics: Num rows: 51 Data size: 12300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) @@ -635,7 +635,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3, 15, 26, 36, 40, 42, 44, 46, 53, 2] - selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date + selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date Statistics: Num rows: 51 Data size: 12300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index a2e0fdd..d857cb0 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -555,8 +555,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 3:bigint, VectorUDFYearDate(col 0, field YEAR) -> 4:int, VectorUDFMonthDate(col 0, field MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 7:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 8:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 9:int, VectorUDFDateLong(col 0:date) -> 10:date, VectorUDFDateAddColScalar(col 0:date, val 2) -> 11:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 12:date, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 13:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 14:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 15:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 16:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 17:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 18:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 19:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 20:int + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 0, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 3:bigint, VectorUDFYearDate(col 0, field YEAR) -> 4:int, VectorUDFMonthDate(col 0, field MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 7:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 8:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 9:int, VectorUDFDateAddColScalar(col 0:date, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 11:date, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 12:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 13:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 14:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 15:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 16:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 17:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 18:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 19:int Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -847,8 +847,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 0, 5, 6, 7, 8, 9, 10, 4, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - selectExpressions: LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 3:int, VectorUDFYearDate(col 0, field YEAR) -> 4:int) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 3:int, VectorUDFMonthDate(col 0, field MONTH) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfWeekTimestamp(col 1:timestamp, field DAY_OF_WEEK) -> 3:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:date, col 0:date)(children: CastTimestampToDate(col 1:timestamp) -> 3:date) -> 4:boolean, LongColEqualLongColumn(col 3:date, col 11:date)(children: VectorUDFDateTimestamp(col 1:timestamp) -> 3:date, VectorUDFDateLong(col 0:date) -> 11:date) -> 12:boolean, LongColEqualLongColumn(col 3:date, col 11:date)(children: VectorUDFDateAddColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateAddColScalar(col 0:date, val 2) -> 11:date) -> 13:boolean, LongColEqualLongColumn(col 3:date, col 11:date)(children: VectorUDFDateSubColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 11:date) -> 14:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 11:int) -> 15:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 16:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 17:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 18:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 11:int) -> 19:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 20:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 21:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 22:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 23:boolean, LongColEqualLongColumn(col 3:int, col 11:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 11:int) -> 24:boolean + projectedOutputColumnNums: [1, 0, 5, 6, 7, 8, 9, 10, 4, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] + selectExpressions: LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 3:int, VectorUDFYearDate(col 0, field YEAR) -> 4:int) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 3:int, VectorUDFMonthDate(col 0, field MONTH) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfWeekTimestamp(col 1:timestamp, field DAY_OF_WEEK) -> 3:int, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:date, col 0:date)(children: CastTimestampToDate(col 1:timestamp) -> 3:date) -> 4:boolean, LongColEqualLongColumn(col 3:date, col 0:date)(children: VectorUDFDateTimestamp(col 1:timestamp) -> 3:date, col 0:date) -> 11:boolean, LongColEqualLongColumn(col 3:date, col 12:date)(children: VectorUDFDateAddColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateAddColScalar(col 0:date, val 2) -> 12:date) -> 13:boolean, LongColEqualLongColumn(col 3:date, col 12:date)(children: VectorUDFDateSubColScalar(col 1:timestamp, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 12:date) -> 14:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 12:int) -> 15:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 16:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 17:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 18:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 12:int) -> 19:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 20:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 21:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 1:timestamp, val NULL) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 22:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2000-01-01) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 23:boolean, LongColEqualLongColumn(col 3:int, col 12:int)(children: VectorUDFDateDiffColScalar(col 0:date, val 2007-03-14) -> 3:int, VectorUDFDateDiffColScalar(col 0:date, val NULL) -> 12:int) -> 24:boolean Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1111,8 +1111,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 5, 6, 7, 9] - selectExpressions: VectorUDFDateLong(col 3:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date) -> 4:date, VectorUDFDateLong(col 3:date)(children: VectorUDFDateSubColScalar(col 0:date, val 2) -> 3:date) -> 5:date, VectorUDFDateDiffColCol(col 0:date, col 3:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date) -> 6:int, VectorUDFDateDiffColCol(col 0:date, col 3:date)(children: VectorUDFDateSubColScalar(col 0:date, val 2) -> 3:date) -> 7:int, VectorUDFDateDiffColCol(col 3:date, col 8:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 8:date) -> 9:int + projectedOutputColumnNums: [0, 3, 4, 6, 7, 9] + selectExpressions: VectorUDFDateAddColScalar(col 0:date, val 2) -> 3:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 4:date, VectorUDFDateDiffColCol(col 0:date, col 5:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 5:date) -> 6:int, VectorUDFDateDiffColCol(col 0:date, col 5:date)(children: VectorUDFDateSubColScalar(col 0:date, val 2) -> 5:date) -> 7:int, VectorUDFDateDiffColCol(col 5:date, col 8:date)(children: VectorUDFDateAddColScalar(col 0:date, val 2) -> 5:date, VectorUDFDateSubColScalar(col 0:date, val 2) -> 8:date) -> 9:int Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 666572a..0e147be 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -940,6 +940,29 @@ {"FilterColumnBetweenDynamicValue", "date", ""}, {"FilterColumnBetweenDynamicValue", "timestamp", ""}, + {"ColumnBetween", "long", ""}, + {"ColumnBetween", "double", ""}, + {"ColumnBetween", "long", "!"}, + {"ColumnBetween", "double", "!"}, + + {"StringColumnBetween", "string", ""}, + {"StringColumnBetween", "string", "!"}, + + {"TruncStringColumnBetween", "char", ""}, + {"TruncStringColumnBetween", "char", "!"}, + + {"TruncStringColumnBetween", "varchar", ""}, + {"TruncStringColumnBetween", "varchar", "!"}, + + {"TimestampColumnBetween", "timestamp", ""}, + {"TimestampColumnBetween", "timestamp", "!"}, + + {"DecimalColumnBetween", "decimal", ""}, + {"DecimalColumnBetween", "decimal", "!"}, + + {"Decimal64ColumnBetween", ""}, + {"Decimal64ColumnBetween", "!"}, + {"ColumnCompareColumn", "Equal", "long", "long", "=="}, {"ColumnCompareColumn", "Equal", "long", "double", "=="}, {"ColumnCompareColumn", "Equal", "double", "double", "=="}, @@ -1368,6 +1391,15 @@ private void generate() throws Exception { generateFilterColumnBetween(tdesc); } else if (tdesc[0].equals("FilterColumnBetweenDynamicValue")) { generateFilterColumnBetweenDynamicValue(tdesc); + } else if (tdesc[0].equals("ColumnBetween") || + tdesc[0].equals("StringColumnBetween") || + tdesc[0].equals("TimestampColumnBetween") || + tdesc[0].equals("DecimalColumnBetween")) { + generateColumnBetween(tdesc); + } else if (tdesc[0].equals("TruncStringColumnBetween")) { + generateTruncStringColumnBetween(tdesc); + } else if (tdesc[0].equals("Decimal64ColumnBetween")) { + generateDecimal64ColumnBetween(tdesc); } else if (tdesc[0].equals("ScalarArithmeticColumn") || tdesc[0].equals("ScalarDivideColumn")) { generateScalarArithmeticColumn(tdesc); } else if (tdesc[0].equals("FilterColumnCompareColumn")) { @@ -1693,6 +1725,63 @@ private void generateFilterColumnBetweenDynamicValue(String[] tdesc) throws Exce className, templateString); } + private void generateColumnBetween(String[] tdesc) throws Exception { + String operandType = tdesc[1]; + String optionalNot = tdesc[2]; + + String className = getCamelCaseType(operandType) + "Column" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + String inputColumnVectorType = getColumnVectorType(operandType); + + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", inputColumnVectorType); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", optionalNot); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateTruncStringColumnBetween(String[] tdesc) throws Exception { + String operandType = tdesc[1]; + String optionalNot = tdesc[2]; + + String className = getCamelCaseType(operandType) + "Column" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + String baseClassName = "StringColumn" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateDecimal64ColumnBetween(String[] tdesc) throws Exception { + String optionalNot = tdesc[1]; + + String className = "Decimal64Column" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + String baseClassName = "LongColumn" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + private void generateColumnCompareColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2];