diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt new file mode 100644 index 0000000..1aa398a --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnBetween.txt @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Output a boolean value indicating if a column is [NOT] BETWEEN two constants. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + + protected final leftValue; + protected final rightValue; + + public (int colNum, leftValue, rightValue, int outputColumnNum) { + super(outputColumnNum); + this.colNum = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + public () { + super(); + + // Dummy final assignments. + colNum = -1; + leftValue = 0; + rightValue = 0; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector = () batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + [] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + final repeatValue = vector[0]; + outputVector[0] = (repeatValue < leftValue || repeatValue > rightValue) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + final value = vector[i]; + outputVector[i] = (value < leftValue || value > rightValue) ? 0 : 1; + } + } + } + } + + @Override + public String vectorExpressionParameters() { + return getColumnParamString(0, colNum) + ", left " + leftValue + ", right " + rightValue; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // return null since this will be handled as a special case in VectorizationContext + return null; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt new file mode 100644 index 0000000..1763cbd --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnBetween.txt @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +public class extends { + private static final long serialVersionUID = 1L; + + public () { + super(); + } + + public (int colNum, long leftValue, long rightValue, int outputColumnNum) { + super(colNum, leftValue, rightValue, outputColumnNum); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1]; + HiveDecimalWritable writable1 = new HiveDecimalWritable(); + writable1.deserialize64(leftValue, decimalTypeInfo1.scale()); + + DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2]; + HiveDecimalWritable writable2 = new HiveDecimalWritable(); + writable2.deserialize64(rightValue, decimalTypeInfo2.scale()); + return + getColumnParamString(0, colNum) + + ", decimal64Left " + leftValue + ", decimalLeft " + writable1.toString() + + ", decimal64Right " + rightValue + ", decimalRight " + writable2.toString(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt new file mode 100644 index 0000000..6fd1301 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnBetween.txt @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; + +import java.util.Arrays; +import java.util.HashSet; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class extends VectorExpression { + private static final long serialVersionUID = 1L; + + private final int inputColumn; + + protected final HiveDecimal leftValue; + protected final HiveDecimal rightValue; + + public () { + super(); + + // Dummy final assignments. + inputColumn = -1; + leftValue = null; + rightValue = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public (int colNum, HiveDecimal leftValue, HiveDecimal rightValue, + int outputColumnNum) { + super(outputColumnNum); + this.inputColumn = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColumnVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + HiveDecimalWritable[] vector = inputColumnVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColumnVector.isRepeating) { + if (inputColumnVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + final HiveDecimalWritable repeatValue = vector[0]; + outputVector[0] = + (DecimalUtil.compare(repeatValue, leftValue) < 0 || + DecimalUtil.compare(repeatValue, rightValue) > 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColumnVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else /* there are NULLs in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + outputVector[i] = + (DecimalUtil.compare(value, leftValue) < 0 || + DecimalUtil.compare(value, rightValue) > 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputColumn) + + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } + +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index 0664cbf..47dd42f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -34,12 +34,12 @@ public class extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; + protected final int colNum; // The comparison is of the form "column BETWEEN leftValue AND rightValue". // NOTE: These can be set later by FilterColumnBetweenDynamicValue.txt so they are not final. - private leftValue; - private rightValue; + protected leftValue; + protected rightValue; public (int colNum, leftValue, rightValue) { super(); diff --git ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt new file mode 100644 index 0000000..798cb95 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/StringColumnBetween.txt @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +import java.util.Arrays; + +public class extends VectorExpression { + private static final long serialVersionUID = 1L; + + protected final int inputCol; + + private final byte[] left; + private final byte[] right; + + public () { + super(); + + // Dummy final assignments. + inputCol = -1; + left = null; + right = null; + } + + public (int colNum, byte[] left, byte[] right, int outputColumnNum) { + super(outputColumnNum); + this.inputCol = colNum; + this.left = left; + this.right = right; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] start = inputColVector.start; + int[] length = inputColVector.length; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + final byte[] repeatBytes = vector[0]; + final int repeatStart = start[0]; + final int repeatLength = length[0]; + outputVector[0] = + (StringExpr.compare(repeatBytes, repeatStart, repeatLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, repeatBytes, repeatStart, repeatLength) < 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + final byte[] valueBytes = vector[i]; + final int valueStart = start[i]; + final int valueLength = length[i]; + outputVector[i] = + (StringExpr.compare(valueBytes, valueStart, valueLength, left, 0, left.length) < 0 || + StringExpr.compare(right, 0, right.length, valueBytes, valueStart, valueLength) < 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputCol) + + ", left " + displayUtf8Bytes(left) + ", right " + displayUtf8Bytes(right); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt new file mode 100644 index 0000000..db42577 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnBetween.txt @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.HashSet; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * Output a boolean value indicating if a column is BETWEEN two constants. + */ +public class extends VectorExpression { + private static final long serialVersionUID = 1L; + + private final int inputCol; + + private final Timestamp leftValue; + private final Timestamp rightValue; + + public () { + super(); + + // Dummy final assignments. + inputCol = -1; + leftValue = null; + rightValue = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public (int colNum, Timestamp leftValue, Timestamp rightValue, int outputColumnNum) { + super(outputColumnNum); + this.inputCol = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + @Override + public void evaluate(VectorizedRowBatch batch) throws HiveException { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + outputVector[0] = + (inputColVector.compareTo(0, leftValue) < 0 || + inputColVector.compareTo(0, rightValue) > 0) ? 0 : 1; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { + + // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop. + + if (!outputColVector.noNulls) { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } else { + for(int j = 0; j != n; j++) { + final int i = sel[j]; + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else { + if (!outputColVector.noNulls) { + + // Assume it is almost always a performance win to fill all of isNull so we can + // safely reset noNulls. + Arrays.fill(outputIsNull, false); + outputColVector.noNulls = true; + } + for(int i = 0; i != n; i++) { + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputIsNull[i]) { + outputVector[i] = + (inputColVector.compareTo(i, leftValue) < 0 || + inputColVector.compareTo(i, rightValue) > 0) ? 0 : 1; + } + } + } + } + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + return + getColumnParamString(0, inputCol) + + ", left " + leftValue.toString() + ", right " + rightValue.toString(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt new file mode 100644 index 0000000..4ab8440 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TruncStringColumnBetween.txt @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +public class extends { + private static final long serialVersionUID = 1L; + + public () { + super(); + } + + public (int colNum, byte[] left, byte[] right, int outputColumnNum) { + super(colNum, left, right, outputColumnNum); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index e541217..065e4ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1626,6 +1626,20 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass); + return createDecimal64VectorExpression( + vectorClass, childExprs, childrenMode, + isDecimal64ScaleEstablished, decimal64ColumnScale, + returnTypeInfo, returnDataTypePhysicalVariation); + } + + private VectorExpression createDecimal64VectorExpression(Class vectorClass, + List childExprs, VectorExpressionDescriptor.Mode childrenMode, + boolean isDecimal64ScaleEstablished, int decimal64ColumnScale, + TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation) + throws HiveException { + + final int numChildren = childExprs.size(); + /* * Custom build arguments. */ @@ -1659,8 +1673,7 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd children.add(filterExpr); } arguments[i] = colIndex; - } else { - Preconditions.checkState(childExpr instanceof ExprNodeConstantDesc); + } else if (childExpr instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childExpr; if (typeInfo instanceof DecimalTypeInfo) { if (!isDecimal64ScaleEstablished) { @@ -1681,6 +1694,8 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd (scalarValue == null) ? getConstantVectorExpression(null, typeInfo, childrenMode) : scalarValue; } + } else { + return null; } } @@ -2049,8 +2064,8 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, //First handle special cases. If one of the special case methods cannot handle it, // it returns null. VectorExpression ve = null; - if (udf instanceof GenericUDFBetween && mode == VectorExpressionDescriptor.Mode.FILTER) { - ve = getBetweenFilterExpression(childExpr, mode, returnType); + if (udf instanceof GenericUDFBetween) { + ve = getBetweenExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { ve = getInExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIf) { @@ -2444,14 +2459,42 @@ private VectorExpression getInExpression(List childExpr, expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); ((IDoubleInExpr) expr).setInListValues(inValsD); } else if (isDecimalFamily(colType)) { - cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class); - HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()]; - for (int i = 0; i != inValsD.length; i++) { - inValsD[i] = (HiveDecimal) getVectorTypeScalarValue( - (ExprNodeConstantDesc) childrenForInList.get(i)); + + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr); + if (tryDecimal64) { + cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? + FilterDecimal64ColumnInList.class : Decimal64ColumnInList.class); + final int scale = ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale(); + expr = createDecimal64VectorExpression( + cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, + /* isDecimal64ScaleEstablished */ true, + /* decimal64ColumnScale */ scale, + returnType, DataTypePhysicalVariation.NONE); + if (expr != null) { + long[] inVals = new long[childrenForInList.size()]; + for (int i = 0; i != inVals.length; i++) { + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childrenForInList.get(i); + HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue(); + final long decimal64Scalar = + new HiveDecimalWritable(hiveDecimal).serialize64(scale); + inVals[i] = decimal64Scalar; + } + ((ILongInExpr) expr).setInListValues(inVals); + } + } + if (expr == null) { + cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? + FilterDecimalColumnInList.class : DecimalColumnInList.class); + expr = createVectorExpression( + cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); + HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()]; + for (int i = 0; i != inValsD.length; i++) { + inValsD[i] = (HiveDecimal) getVectorTypeScalarValue( + (ExprNodeConstantDesc) childrenForInList.get(i)); + } + ((IDecimalInExpr) expr).setInListValues(inValsD); } - expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType); - ((IDecimalInExpr) expr).setInListValues(inValsD); } else if (isDateFamily(colType)) { cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); long[] inVals = new long[childrenForInList.size()]; @@ -2973,21 +3016,32 @@ private VectorExpression getCastToLongExpression(List childExpr, P return null; } - /* Get a [NOT] BETWEEN filter expression. This is treated as a special case + private VectorExpression tryDecimal64Between(VectorExpressionDescriptor.Mode mode, boolean isNot, + ExprNodeDesc colExpr, List childrenAfterNot, TypeInfo returnTypeInfo) + throws HiveException { + final Class cl; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = (isNot ? Decimal64ColumnNotBetween.class : Decimal64ColumnBetween.class); + } else { + cl = (isNot ? FilterDecimal64ColumnNotBetween.class : FilterDecimal64ColumnBetween.class); + } + return + createDecimal64VectorExpression( + cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, + /* isDecimal64ScaleEstablished */ true, + /* decimal64ColumnScale */ ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale(), + returnTypeInfo, DataTypePhysicalVariation.NONE); + } + + /* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case * because the NOT is actually specified in the expression tree as the first argument, * and we don't want any runtime cost for that. So creating the VectorExpression * needs to be done differently than the standard way where all arguments are * passed to the VectorExpression constructor. */ - private VectorExpression getBetweenFilterExpression(List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) - throws HiveException { - - if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - - // Projection mode is not yet supported for [NOT] BETWEEN. Return null so Vectorizer - // knows to revert to row-at-a-time execution. - return null; - } + private VectorExpression getBetweenExpression(List childExpr, + VectorExpressionDescriptor.Mode mode, TypeInfo returnType) + throws HiveException { boolean hasDynamicValues = false; @@ -2995,6 +3049,11 @@ private VectorExpression getBetweenFilterExpression(List childExpr if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) { hasDynamicValues = true; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + + // Projection mode is not applicable. + return null; + } } else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) { return null; @@ -3021,7 +3080,7 @@ private VectorExpression getBetweenFilterExpression(List childExpr } List castChildren = new ArrayList(); - + boolean wereCastUdfs = false; for (ExprNodeDesc desc: childExpr.subList(1, 4)) { if (commonType.equals(desc.getTypeInfo())) { castChildren.add(desc); @@ -3030,6 +3089,7 @@ private VectorExpression getBetweenFilterExpression(List childExpr ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc })); castChildren.add(engfd); + wereCastUdfs = true; } } String colType = commonType.getTypeName(); @@ -3040,55 +3100,141 @@ private VectorExpression getBetweenFilterExpression(List childExpr // determine class Class cl = null; if (isIntFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterLongColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterLongColumnBetweenDynamicValue.class : + FilterLongColumnBetween.class); + } } else if (isIntFamily(colType) && notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnNotBetween.class; + } else { + cl = FilterLongColumnNotBetween.class; + } } else if (isFloatFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDoubleColumnBetweenDynamicValue.class : - FilterDoubleColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DoubleColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDoubleColumnBetweenDynamicValue.class : + FilterDoubleColumnBetween.class); + } } else if (isFloatFamily(colType) && notKeywordPresent) { - cl = FilterDoubleColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DoubleColumnNotBetween.class; + } else { + cl = FilterDoubleColumnNotBetween.class; + } } else if (colType.equals("string") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterStringColumnBetweenDynamicValue.class : - FilterStringColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = StringColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterStringColumnBetweenDynamicValue.class : + FilterStringColumnBetween.class); + } } else if (colType.equals("string") && notKeywordPresent) { - cl = FilterStringColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = StringColumnNotBetween.class; + } else { + cl = FilterStringColumnNotBetween.class; + } } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterVarCharColumnBetweenDynamicValue.class : - FilterVarCharColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = VarCharColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterVarCharColumnBetweenDynamicValue.class : + FilterVarCharColumnBetween.class); + } } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) { - cl = FilterVarCharColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = VarCharColumnNotBetween.class; + } else { + cl = FilterVarCharColumnNotBetween.class; + } } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterCharColumnBetweenDynamicValue.class : - FilterCharColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = CharColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterCharColumnBetweenDynamicValue.class : + FilterCharColumnBetween.class); + } } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { - cl = FilterCharColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = CharColumnNotBetween.class; + } else { + cl = FilterCharColumnNotBetween.class; + } } else if (colType.equals("timestamp") && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterTimestampColumnBetweenDynamicValue.class : - FilterTimestampColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = TimestampColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterTimestampColumnBetweenDynamicValue.class : + FilterTimestampColumnBetween.class); + } } else if (colType.equals("timestamp") && notKeywordPresent) { - cl = FilterTimestampColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = TimestampColumnNotBetween.class; + } else { + cl = FilterTimestampColumnNotBetween.class; + } } else if (isDecimalFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDecimalColumnBetweenDynamicValue.class : - FilterDecimalColumnBetween.class); + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues; + if (tryDecimal64) { + VectorExpression decimal64VecExpr = + tryDecimal64Between( + mode, /* isNot */ false, colExpr, childrenAfterNot, + returnType); + if (decimal64VecExpr != null) { + return decimal64VecExpr; + } + } + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DecimalColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDecimalColumnBetweenDynamicValue.class : + FilterDecimalColumnBetween.class); + } } else if (isDecimalFamily(colType) && notKeywordPresent) { - cl = FilterDecimalColumnNotBetween.class; + final boolean tryDecimal64 = + checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues; + if (tryDecimal64) { + VectorExpression decimal64VecExpr = + tryDecimal64Between( + mode, /* isNot */ true, colExpr, childrenAfterNot, returnType); + if (decimal64VecExpr != null) { + return decimal64VecExpr; + } + } + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = DecimalColumnNotBetween.class; + } else { + cl = FilterDecimalColumnNotBetween.class; + } } else if (isDateFamily(colType) && !notKeywordPresent) { - cl = (hasDynamicValues ? - FilterDateColumnBetweenDynamicValue.class : - FilterLongColumnBetween.class); + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnBetween.class; + } else { + cl = (hasDynamicValues ? + FilterDateColumnBetweenDynamicValue.class : + FilterLongColumnBetween.class); + } } else if (isDateFamily(colType) && notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; + if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { + cl = LongColumnNotBetween.class; + } else { + cl = FilterLongColumnNotBetween.class; + } } - return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression( + cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } private boolean isCondExpr(ExprNodeDesc exprNodeDesc) { @@ -3523,7 +3669,11 @@ private Object getScalarValue(ExprNodeConstantDesc constDesc) private long getIntFamilyScalarAsLong(ExprNodeConstantDesc constDesc) throws HiveException { Object o = getScalarValue(constDesc); - if (o instanceof Integer) { + if (o instanceof Byte) { + return (Byte) o; + } if (o instanceof Short) { + return (Short) o; + } else if (o instanceof Integer) { return (Integer) o; } else if (o instanceof Long) { return (Long) o; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java new file mode 100644 index 0000000..5632cfb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/Decimal64ColumnInList.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class Decimal64ColumnInList extends LongColumnInList { + + private static final long serialVersionUID = 1L; + + public Decimal64ColumnInList(int colNum, int outputColumnNum) { + super(colNum, outputColumnNum); + } + + public Decimal64ColumnInList() { + super(); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) inputTypeInfos[0]; + final int scale = decimalTypeInfo.scale(); + HiveDecimalWritable writable = new HiveDecimalWritable(); + StringBuilder sb = new StringBuilder(); + sb.append(getColumnParamString(0, colNum)); + sb.append(", values ["); + for (long value : inListValues) { + writable.deserialize64(value, scale); + sb.append(", decimal64Val "); + sb.append(value); + sb.append(", decimalVal "); + sb.append(writable.toString()); + } + sb.append("]"); + return sb.toString(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // return null since this will be handled as a special case in VectorizationContext + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java new file mode 100644 index 0000000..c26a93a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnBetween.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +public class FilterDecimal64ColumnBetween extends FilterLongColumnBetween { + + private static final long serialVersionUID = 1L; + + public FilterDecimal64ColumnBetween(int colNum, long leftValue, long rightValue) { + super(colNum, leftValue, rightValue); + } + + public FilterDecimal64ColumnBetween() { + super(); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1]; + HiveDecimalWritable writable1 = new HiveDecimalWritable(); + writable1.deserialize64(leftValue, decimalTypeInfo1.scale()); + + DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2]; + HiveDecimalWritable writable2 = new HiveDecimalWritable(); + writable2.deserialize64(rightValue, decimalTypeInfo2.scale()); + return + getColumnParamString(0, colNum) + + ", decimal64LeftVal " + leftValue + ", decimalLeftVal " + writable1.toString() + + ", decimal64RightVal " + rightValue + ", decimalRightVal " + writable2.toString(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnInList.java new file mode 100644 index 0000000..a75cdbf --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnInList.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +/** + * Evaluate IN filter on a batch for a vector of longs. + */ +public class FilterDecimal64ColumnInList extends FilterLongColumnInList { + + private static final long serialVersionUID = 1L; + + public FilterDecimal64ColumnInList() { + super(); + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public FilterDecimal64ColumnInList(int colNum) { + super(colNum); + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) inputTypeInfos[0]; + final int scale = decimalTypeInfo.scale(); + HiveDecimalWritable writable = new HiveDecimalWritable(); + StringBuilder sb = new StringBuilder(); + sb.append(getColumnParamString(0, inputCol)); + sb.append(", values ["); + for (long value : inListValues) { + writable.deserialize64(value, scale); + sb.append(", decimal64Val "); + sb.append(value); + sb.append(", decimalVal "); + sb.append(writable.toString()); + } + sb.append("]"); + return sb.toString(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnNotBetween.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnNotBetween.java new file mode 100644 index 0000000..13d5c1a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimal64ColumnNotBetween.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; + +public class FilterDecimal64ColumnNotBetween extends FilterLongColumnNotBetween { + + private static final long serialVersionUID = 1L; + + public FilterDecimal64ColumnNotBetween(int colNum, long leftValue, long rightValue) { + super(colNum, leftValue, rightValue); + } + + public FilterDecimal64ColumnNotBetween() { + super(); + } + + @Override + public String vectorExpressionParameters() { + DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) inputTypeInfos[1]; + HiveDecimalWritable writable1 = new HiveDecimalWritable(); + writable1.deserialize64(leftValue, decimalTypeInfo1.scale()); + + DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) inputTypeInfos[2]; + HiveDecimalWritable writable2 = new HiveDecimalWritable(); + writable2.deserialize64(rightValue, decimalTypeInfo2.scale()); + return + getColumnParamString(0, colNum) + + ", decimal64LeftVal " + leftValue + ", decimalLeftVal " + writable1.toString() + + ", decimal64RightVal " + rightValue + ", decimalRightVal " + writable2.toString(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64, + VectorExpressionDescriptor.ArgumentType.DECIMAL_64) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java index 312a388..7306bbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java @@ -36,8 +36,9 @@ public class FilterLongColumnInList extends VectorExpression implements ILongInExpr { private static final long serialVersionUID = 1L; - private final int inputCol; - private long[] inListValues; + + protected final int inputCol; + protected long[] inListValues; // Transient members initialized by transientInit method. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java index 8469882..d519141 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java @@ -33,8 +33,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long[] inListValues; + protected int colNum; + protected long[] inListValues; // The set object containing the IN list. This is optimized for lookup // of the data type of the column. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java index 55c2586..9328eb4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -170,6 +170,11 @@ public void setInListValues(byte [][] a) { @Override public String vectorExpressionParameters() { - return getColumnParamString(0, inputCol) + ", values " + Arrays.toString(inListValues); + StringBuilder sb = new StringBuilder(); + sb.append("col "); + sb.append(inputCol); + sb.append(", values "); + sb.append(displayArrayOfUtf8ByteArrays(inListValues)); + return sb.toString(); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index b6ae7d2..bc1b834 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -159,20 +159,24 @@ public boolean getAddPadding() { OMIT_GENERATION, STRING_FAMILY, STRING_FAMILY_OTHER_TYPE_VALUE, - TIMESTAMP_MILLISECONDS + TIMESTAMP_MILLISECONDS, + VALUE_LIST } private final GenerationKind generationKind; private final TypeInfo typeInfo; private final TypeInfo sourceTypeInfo; private final StringGenerationOption stringGenerationOption; + private final List valueList; private GenerationSpec(GenerationKind generationKind, TypeInfo typeInfo, - TypeInfo sourceTypeInfo, StringGenerationOption stringGenerationOption) { + TypeInfo sourceTypeInfo, StringGenerationOption stringGenerationOption, + List valueList) { this.generationKind = generationKind; this.typeInfo = typeInfo; this.sourceTypeInfo = sourceTypeInfo; this.stringGenerationOption = stringGenerationOption; + this.valueList = valueList; } public GenerationKind getGenerationKind() { @@ -191,31 +195,40 @@ public StringGenerationOption getStringGenerationOption() { return stringGenerationOption; } + public List getValueList() { + return valueList; + } + public static GenerationSpec createSameType(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.SAME_TYPE, typeInfo, null, null); + GenerationKind.SAME_TYPE, typeInfo, null, null, null); } public static GenerationSpec createOmitGeneration(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.OMIT_GENERATION, typeInfo, null, null); + GenerationKind.OMIT_GENERATION, typeInfo, null, null, null); } public static GenerationSpec createStringFamily(TypeInfo typeInfo, StringGenerationOption stringGenerationOption) { return new GenerationSpec( - GenerationKind.STRING_FAMILY, typeInfo, null, stringGenerationOption); + GenerationKind.STRING_FAMILY, typeInfo, null, stringGenerationOption, null); } public static GenerationSpec createStringFamilyOtherTypeValue(TypeInfo typeInfo, TypeInfo otherTypeTypeInfo) { return new GenerationSpec( - GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, otherTypeTypeInfo, null); + GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, otherTypeTypeInfo, null, null); } public static GenerationSpec createTimestampMilliseconds(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, null, null); + GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, null, null, null); + } + + public static GenerationSpec createValueList(TypeInfo typeInfo, List valueList) { + return new GenerationSpec( + GenerationKind.VALUE_LIST, typeInfo, null, null, valueList); } } @@ -902,6 +915,13 @@ public static Object randomStringFamily(Random random, TypeInfo typeInfo, object = longWritable; } break; + case VALUE_LIST: + { + List valueList = generationSpec.getValueList(); + final int valueCount = valueList.size(); + object = valueList.get(r.nextInt(valueCount)); + } + break; default: throw new RuntimeException("Unexpected generationKind " + generationKind); } @@ -1307,24 +1327,42 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, DataTypePhysicalVariation dataTypePhysicalVariation, boolean allowNull) { + return randomWritable(typeInfo, objectInspector, dataTypePhysicalVariation, allowNull); + } + + public static Object randomWritable(Random random, TypeInfo typeInfo, + ObjectInspector objectInspector) { + return randomWritable( + random, typeInfo, objectInspector, DataTypePhysicalVariation.NONE, true); + } + + public static Object randomWritable(Random random, TypeInfo typeInfo, + ObjectInspector objectInspector, boolean allowNull) { + return randomWritable( + random, typeInfo, objectInspector, DataTypePhysicalVariation.NONE, allowNull); + } + + public static Object randomWritable(Random random, TypeInfo typeInfo, + ObjectInspector objectInspector, DataTypePhysicalVariation dataTypePhysicalVariation, + boolean allowNull) { switch (typeInfo.getCategory()) { case PRIMITIVE: { - if (allowNull && r.nextInt(20) == 0) { + if (allowNull && random.nextInt(20) == 0) { return null; } - final Object object = randomPrimitiveObject(r, (PrimitiveTypeInfo) typeInfo); + final Object object = randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo); return getWritablePrimitiveObject( (PrimitiveTypeInfo) typeInfo, objectInspector, dataTypePhysicalVariation, object); } case LIST: { - if (allowNull && r.nextInt(20) == 0) { + if (allowNull && random.nextInt(20) == 0) { return null; } // Always generate a list with at least 1 value? - final int elementCount = 1 + r.nextInt(100); + final int elementCount = 1 + random.nextInt(100); final StandardListObjectInspector listObjectInspector = (StandardListObjectInspector) objectInspector; final ObjectInspector elementObjectInspector = @@ -1345,7 +1383,8 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, } final Object listObj = listObjectInspector.create(elementCount); for (int i = 0; i < elementCount; i++) { - final Object ele = randomWritable(elementTypeInfo, elementObjectInspector, allowNull); + final Object ele = randomWritable( + random, elementTypeInfo, elementObjectInspector, allowNull); // UNDONE: For now, a 1-element list with a null element is a null list... if (ele == null && elementCount == 1) { return null; @@ -1382,10 +1421,10 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, } case MAP: { - if (allowNull && r.nextInt(20) == 0) { + if (allowNull && random.nextInt(20) == 0) { return null; } - final int keyPairCount = r.nextInt(100); + final int keyPairCount = random.nextInt(100); final StandardMapObjectInspector mapObjectInspector = (StandardMapObjectInspector) objectInspector; final ObjectInspector keyObjectInspector = @@ -1400,15 +1439,15 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, valueObjectInspector); final Object mapObj = mapObjectInspector.create(); for (int i = 0; i < keyPairCount; i++) { - Object key = randomWritable(keyTypeInfo, keyObjectInspector); - Object value = randomWritable(valueTypeInfo, valueObjectInspector); + Object key = randomWritable(random, keyTypeInfo, keyObjectInspector); + Object value = randomWritable(random, valueTypeInfo, valueObjectInspector); mapObjectInspector.put(mapObj, key, value); } return mapObj; } case STRUCT: { - if (allowNull && r.nextInt(20) == 0) { + if (allowNull && random.nextInt(20) == 0) { return null; } final StandardStructObjectInspector structObjectInspector = @@ -1423,7 +1462,7 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, final TypeInfo fieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector( fieldObjectInspector); - final Object fieldObj = randomWritable(fieldTypeInfo, fieldObjectInspector); + final Object fieldObj = randomWritable(random, fieldTypeInfo, fieldObjectInspector); structObjectInspector.setStructFieldData(structObj, fieldRef, fieldObj); } return structObj; @@ -1434,13 +1473,13 @@ public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector, (StandardUnionObjectInspector) objectInspector; final List objectInspectorList = unionObjectInspector.getObjectInspectors(); final int unionCount = objectInspectorList.size(); - final byte tag = (byte) r.nextInt(unionCount); + final byte tag = (byte) random.nextInt(unionCount); final ObjectInspector fieldObjectInspector = objectInspectorList.get(tag); final TypeInfo fieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector( fieldObjectInspector); - final Object fieldObj = randomWritable(fieldTypeInfo, fieldObjectInspector, false); + final Object fieldObj = randomWritable(random, fieldTypeInfo, fieldObjectInspector, false); if (fieldObj == null) { throw new RuntimeException(); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java new file mode 100644 index 0000000..99a074c --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorBetweenIn.java @@ -0,0 +1,779 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.TestVectorArithmetic.ColumnScalarMode; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.io.WritableComparable; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestVectorBetweenIn { + + @Test + public void testTinyInt() throws Exception { + Random random = new Random(5371); + + doBetweenIn(random, "tinyint"); + } + + @Test + public void testSmallInt() throws Exception { + Random random = new Random(2772); + + doBetweenIn(random, "smallint"); + } + + @Test + public void testInt() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "int"); + } + + @Test + public void testBigInt() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "bigint"); + } + + @Test + public void testString() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "string"); + } + + @Test + public void testTimestamp() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "timestamp"); + } + + @Test + public void testDate() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "date"); + } + + @Test + public void testFloat() throws Exception { + Random random = new Random(7322); + + doBetweenIn(random, "float"); + } + + @Test + public void testDouble() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "double"); + } + + @Test + public void testChar() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "char(10)"); + } + + @Test + public void testVarchar() throws Exception { + Random random = new Random(12882); + + doBetweenIn(random, "varchar(15)"); + } + + @Test + public void testDecimal() throws Exception { + Random random = new Random(9300); + + doDecimalTests(random, /* tryDecimal64 */ false); + } + + @Test + public void testDecimal64() throws Exception { + Random random = new Random(9300); + + doDecimalTests(random, /* tryDecimal64 */ true); + } + + public enum BetweenInTestMode { + ROW_MODE, + ADAPTOR, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + public enum BetweenInVariation { + FILTER_BETWEEN, + FILTER_NOT_BETWEEN, + PROJECTION_BETWEEN, + PROJECTION_NOT_BETWEEN, + FILTER_IN, + PROJECTION_IN; + + static final int count = values().length; + + final boolean isFilter; + BetweenInVariation() { + isFilter = name().startsWith("FILTER"); + } + } + + private static TypeInfo[] decimalTypeInfos = new TypeInfo[] { + new DecimalTypeInfo(38, 18), + new DecimalTypeInfo(25, 2), + new DecimalTypeInfo(19, 4), + new DecimalTypeInfo(18, 10), + new DecimalTypeInfo(17, 3), + new DecimalTypeInfo(12, 2), + new DecimalTypeInfo(7, 1) + }; + + private void doDecimalTests(Random random, boolean tryDecimal64) + throws Exception { + for (TypeInfo typeInfo : decimalTypeInfos) { + doBetweenIn( + random, typeInfo.getTypeName(), tryDecimal64); + } + } + + private void doBetweenIn(Random random, String typeName) + throws Exception { + doBetweenIn(random, typeName, /* tryDecimal64 */ false); + } + + private void doBetweenIn(Random random, String typeName, boolean tryDecimal64) + throws Exception { + + int subVariation; + for (BetweenInVariation betweenInVariation : BetweenInVariation.values()) { + subVariation = 0; + while (true) { + if (!doBetweenInVariation( + random, typeName, tryDecimal64, betweenInVariation, subVariation)) { + break; + } + subVariation++; + } + } + } + + private boolean checkDecimal64(boolean tryDecimal64, TypeInfo typeInfo) { + if (!tryDecimal64 || !(typeInfo instanceof DecimalTypeInfo)) { + return false; + } + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; + boolean result = HiveDecimalWritable.isPrecisionDecimal64(decimalTypeInfo.getPrecision()); + return result; + } + + private void removeValue(List valueList, Object value) { + valueList.remove(value); + } + + private boolean needsValidDataTypeData(TypeInfo typeInfo) { + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + if (primitiveCategory == PrimitiveCategory.STRING || + primitiveCategory == PrimitiveCategory.CHAR || + primitiveCategory == PrimitiveCategory.VARCHAR || + primitiveCategory == PrimitiveCategory.BINARY) { + return false; + } + return true; + } + + private boolean doBetweenInVariation(Random random, String typeName, + boolean tryDecimal64, BetweenInVariation betweenInVariation, int subVariation) + throws Exception { + + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + + boolean isDecimal64 = checkDecimal64(tryDecimal64, typeInfo); + DataTypePhysicalVariation dataTypePhysicalVariation = + (isDecimal64 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE); + final int decimal64Scale = + (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0); + + //---------------------------------------------------------------------------------------------- + + ObjectInspector objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + typeInfo); + + final int valueCount = 10 + random.nextInt(10); + List valueList = new ArrayList(valueCount); + for (int i = 0; i < valueCount; i++) { + valueList.add( + VectorRandomRowSource.randomWritable( + random, typeInfo, objectInspector, dataTypePhysicalVariation, /* allowNull */ false)); + } + List sortedList = new ArrayList(valueCount); + sortedList.addAll(valueList); + + Object object = valueList.get(0); + WritableComparator writableComparator = + WritableComparator.get((Class) object.getClass()); + sortedList.sort(writableComparator); + + final boolean isBetween = + (betweenInVariation == BetweenInVariation.FILTER_BETWEEN || + betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || + betweenInVariation == BetweenInVariation.PROJECTION_BETWEEN || + betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN); + + List compareList = new ArrayList(); + final boolean isInvert; + if (isBetween) { + + // FILTER_BETWEEN + // FILTER_NOT_BETWEEN + // PROJECTION_BETWEEN + // PROJECTION_NOT_BETWEEN + isInvert = + (betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || + betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN); + switch (subVariation) { + case 0: + // Range covers all values exactly. + compareList.add(sortedList.get(0)); + compareList.add(sortedList.get(valueCount - 1)); + break; + case 1: + // Exclude the first and last sorted. + compareList.add(sortedList.get(1)); + compareList.add(sortedList.get(valueCount - 2)); + break; + case 2: + // Only last 2 sorted. + compareList.add(sortedList.get(valueCount - 2)); + compareList.add(sortedList.get(valueCount - 1)); + break; + case 3: + case 4: + case 5: + case 6: + { + // Choose 2 adjacent in the middle. + Object min = sortedList.get(5); + Object max = sortedList.get(6); + compareList.add(min); + compareList.add(max); + if (subVariation == 4) { + removeValue(valueList, min); + } else if (subVariation == 5) { + removeValue(valueList, max); + } else if (subVariation == 6) { + removeValue(valueList, min); + removeValue(valueList, max); + } + } + break; + default: + return false; + } + } else { + + // FILTER_IN. + // PROJECTION_IN. + isInvert = false; + switch (subVariation) { + case 0: + // All values. + compareList.addAll(valueList); + break; + case 1: + // Don't include the first and last sorted. + for (int i = 1; i < valueCount - 1; i++) { + compareList.add(valueList.get(i)); + } + break; + case 2: + // The even ones. + for (int i = 2; i < valueCount; i += 2) { + compareList.add(valueList.get(i)); + } + break; + case 3: + { + // Choose 2 adjacent in the middle. + Object min = sortedList.get(5); + Object max = sortedList.get(6); + compareList.add(min); + compareList.add(max); + if (subVariation == 4) { + removeValue(valueList, min); + } else if (subVariation == 5) { + removeValue(valueList, max); + } else if (subVariation == 6) { + removeValue(valueList, min); + removeValue(valueList, max); + } + } + break; + default: + return false; + } + } + + //---------------------------------------------------------------------------------------------- + + GenerationSpec generationSpec = GenerationSpec.createValueList(typeInfo, valueList); + + List generationSpecList = new ArrayList(); + List explicitDataTypePhysicalVariationList = + new ArrayList(); + generationSpecList.add(generationSpec); + explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + List columns = new ArrayList(); + columns.add("col0"); + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col0", "table", false); + + List children = new ArrayList(); + if (isBetween) { + children.add(new ExprNodeConstantDesc(new Boolean(isInvert))); + } + children.add(col1Expr); + for (Object compareObject : compareList) { + ExprNodeConstantDesc constDesc = + new ExprNodeConstantDesc( + typeInfo, + VectorRandomRowSource.getNonWritablePrimitiveObject( + compareObject, typeInfo, objectInspector)); + children.add(constDesc); + } + + String[] columnNames = columns.toArray(new String[0]); + + Object[][] randomRows = rowSource.randomRows(100000); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + final GenericUDF udf; + final ObjectInspector outputObjectInspector; + if (isBetween) { + + udf = new GenericUDFBetween(); + + // First argument is boolean invert. Arguments 1..3 are inspectors for range limits... + ObjectInspector[] argumentOIs = new ObjectInspector[4]; + argumentOIs[0] = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + argumentOIs[1] = objectInspector; + argumentOIs[2] = objectInspector; + argumentOIs[3] = objectInspector; + outputObjectInspector = udf.initialize(argumentOIs); + } else { + final int compareCount = compareList.size(); + udf = new GenericUDFIn(); + ObjectInspector[] argumentOIs = new ObjectInspector[compareCount]; + // argumentOIs[0] = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ConstantObjectInspector constantObjectInspector = + (ConstantObjectInspector) children.get(2).getWritableObjectInspector(); + for (int i = 0; i < compareCount; i++) { + argumentOIs[i] = constantObjectInspector; + } + outputObjectInspector = udf.initialize(argumentOIs); + } + + TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, udf, children); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[BetweenInTestMode.count][]; + for (int i = 0; i < BetweenInTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + BetweenInTestMode betweenInTestMode = BetweenInTestMode.values()[i]; + switch (betweenInTestMode) { + case ROW_MODE: + if (!doRowCastTest( + typeInfo, + betweenInVariation, + compareList, + columns, + children, + udf, exprDesc, + randomRows, + rowSource.rowStructObjectInspector(), + resultObjects)) { + return false; + } + break; + case ADAPTOR: + case VECTOR_EXPRESSION: + if (!doVectorCastTest( + typeInfo, + betweenInVariation, + compareList, + columns, + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + children, + udf, exprDesc, + betweenInTestMode, + batchSource, + exprDesc.getWritableObjectInspector(), + outputTypeInfo, + resultObjects)) { + return false; + } + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + betweenInTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < BetweenInTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + BetweenInTestMode betweenInTestMode = BetweenInTestMode.values()[v]; + if (betweenInVariation.isFilter && + expectedResult == null && + vectorResult != null) { + // This is OK. + boolean vectorBoolean = ((BooleanWritable) vectorResult).get(); + if (vectorBoolean) { + Assert.fail( + "Row " + i + + " typeName " + typeName + + " outputTypeName " + outputTypeInfo.getTypeName() + + " " + betweenInVariation + + " " + betweenInTestMode + + " result is NOT NULL and true" + + " does not match row-mode expected result is NULL which means false here" + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } else if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeName + + " " + betweenInVariation + + " " + betweenInTestMode + + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + + " does not match row-mode expected result is NULL " + + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + + " sourceTypeName " + typeName + + " " + betweenInVariation + + " " + betweenInTestMode + + " result " + vectorResult.toString() + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result " + expectedResult.toString() + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i]) + + " exprDesc " + exprDesc.toString()); + } + } + } + } + return true; + } + + private boolean doRowCastTest(TypeInfo typeInfo, + BetweenInVariation betweenInVariation, List compareList, + List columns, List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + Object[][] randomRows, + ObjectInspector rowInspector, Object[] resultObjects) + throws Exception { + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " targetTypeInfo " + targetTypeInfo + + " betweenInTestMode ROW_MODE" + + " exprDesc " + exprDesc.toString()); + */ + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + try { + evaluator.initialize(rowInspector); + } catch (HiveException e) { + return false; + } + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, PrimitiveObjectInspectorFactory.writableBooleanObjectInspector, + ObjectInspectorCopyOption.WRITABLE); + resultObjects[i] = copyResult; + } + + return true; + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + ObjectInspector objectInspector, Object[] resultObjects) { + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + } + + private boolean doVectorCastTest(TypeInfo typeInfo, + BetweenInVariation betweenInVariation, List compareList, + List columns, String[] columnNames, + TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, + List children, + GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, + BetweenInTestMode betweenInTestMode, + VectorRandomBatchSource batchSource, + ObjectInspector objectInspector, + TypeInfo outputTypeInfo, Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (betweenInTestMode == BetweenInTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + final boolean isFilter = betweenInVariation.isFilter; + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = + vectorizationContext.getVectorExpression(exprDesc, + (isFilter ? + VectorExpressionDescriptor.Mode.FILTER : + VectorExpressionDescriptor.Mode.PROJECTION)); + vectorExpression.transientInit(); + + if (betweenInTestMode == BetweenInTestMode.VECTOR_EXPRESSION && + vectorExpression instanceof VectorUDFAdaptor) { + System.out.println( + "*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + + " betweenInTestMode " + betweenInTestMode + + " betweenInVariation " + betweenInVariation + + " vectorExpression " + vectorExpression.toString()); + } + + // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName()); + + /* + System.out.println( + "*DEBUG* typeInfo " + typeInfo.toString() + + " betweenInTestMode " + betweenInTestMode + + " betweenInVariation " + betweenInVariation + + " vectorExpression " + vectorExpression.toString()); + */ + + VectorRandomRowSource rowSource = batchSource.getRowSource(); + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vectorizationContext.getScratchColumnTypeNames(), + vectorizationContext.getScratchDataTypePhysicalVariations()); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = null; + Object[] scrqtchRow = null; + if (!isFilter) { + resultVectorExtractRow = new VectorExtractRow(); + final int outputColumnNum = vectorExpression.getOutputColumnNum(); + resultVectorExtractRow.init( + new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum }); + scrqtchRow = new Object[1]; + } + + boolean copySelectedInUse = false; + int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + final int originalBatchSize = batch.size; + if (isFilter) { + copySelectedInUse = batch.selectedInUse; + if (batch.selectedInUse) { + System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize); + } + } + + // In filter mode, the batch size can be made smaller. + vectorExpression.evaluate(batch); + + if (!isFilter) { + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + objectInspector, resultObjects); + } else { + final int currentBatchSize = batch.size; + if (copySelectedInUse && batch.selectedInUse) { + int selectIndex = 0; + for (int i = 0; i < originalBatchSize; i++) { + final int originalBatchIndex = copySelected[i]; + final boolean booleanResult; + if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) { + booleanResult = true; + selectIndex++; + } else { + booleanResult = false; + } + resultObjects[rowIndex + i] = new BooleanWritable(booleanResult); + } + } else if (batch.selectedInUse) { + int selectIndex = 0; + for (int i = 0; i < originalBatchSize; i++) { + final boolean booleanResult; + if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) { + booleanResult = true; + selectIndex++; + } else { + booleanResult = false; + } + resultObjects[rowIndex + i] = new BooleanWritable(booleanResult); + } + } else if (currentBatchSize == 0) { + // Whole batch got zapped. + for (int i = 0; i < originalBatchSize; i++) { + resultObjects[rowIndex + i] = new BooleanWritable(false); + } + } else { + // Every row kept. + for (int i = 0; i < originalBatchSize; i++) { + resultObjects[rowIndex + i] = new BooleanWritable(true); + } + } + } + + rowIndex += originalBatchSize; + } + + return true; + } +} diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 12ae103..801dda3 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -1390,7 +1390,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(cdate BETWEEN DATE'1969-12-30' AND DATE'1970-01-02') -> 5:boolean + selectExpressions: LongColumnBetween(col 3:date, left -2, right 1) -> 5:boolean Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1425,7 +1425,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -1528,7 +1528,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 5:boolean + selectExpressions: DecimalColumnNotBetween(col 1:decimal(20,10), left -2000, right 4390.1351351351) -> 5:boolean Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1563,7 +1563,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index 966f6c5..d2b1fa4 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -408,7 +408,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3, 10, 12, 13, 14, 11, 7, 16, 23, 2] - selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date + selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) @@ -693,7 +693,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3, 15, 26, 36, 40, 42, 44, 46, 53, 2] - selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date + selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 4edd0e4..9033b88 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -1310,7 +1310,7 @@ STAGE PLANS: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: Group By Operator @@ -1332,7 +1332,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5, 7, 8, 11, 6, 12, 13, 14] - selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, DoubleColumnBetween(col 6:double, left 8.97077295279421E19, right 8.97077295279422E19)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, DoubleColumnBetween(col 6:double, left 8.97077295279421E19, right 8.97077295279422E19)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, DoubleColumnBetween(col 6:double, left 9.20684592523616E19, right 9.20684592523617E19)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 78bcd26..8b1a2be 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -1369,7 +1369,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(cdate BETWEEN DATE'1969-12-30' AND DATE'1970-01-02') -> 5:boolean + selectExpressions: LongColumnBetween(col 3:date, left -2, right 1) -> 5:boolean Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1403,7 +1403,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized @@ -1505,7 +1505,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 5:boolean + selectExpressions: DecimalColumnNotBetween(col 1:decimal(20,10), left -2000, right 4390.1351351351) -> 5:boolean Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1539,7 +1539,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reducer 2 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index f094fba..3944542 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -1296,7 +1296,7 @@ STAGE PLANS: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: Group By Operator @@ -1318,7 +1318,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5, 7, 8, 11, 6, 12, 13, 14] - selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, DoubleColumnBetween(col 6:double, left 8.97077295279421E19, right 8.97077295279422E19)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, DoubleColumnBetween(col 6:double, left 8.97077295279421E19, right 8.97077295279422E19)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, DoubleColumnBetween(col 6:double, left 9.20684592523616E19, right 9.20684592523617E19)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/vector_case_when_2.q.out ql/src/test/results/clientpositive/vector_case_when_2.q.out index 9ff8750..342c518 100644 --- ql/src/test/results/clientpositive/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/vector_case_when_2.q.out @@ -376,7 +376,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3, 10, 12, 13, 14, 11, 7, 16, 23, 2] - selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date + selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 9:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 9:string) -> 10:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 12:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprColumnNull(col 8:boolean, col 9:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean, ConstantVectorExpression(val Early 2010s) -> 9:string) -> 11:string) -> 12:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 5:boolean, val Oldcol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val Early 2000scol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 11:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprNullNull(null, null) -> 11:string) -> 13:string) -> 11:string) -> 13:string, IfExprLongColumnLongColumn(col 5:boolean, col 6:int, col 7:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 5:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 7:int) -> 14:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 5:boolean) -> 11:string, IfExprNullColumn(col 5:boolean, null, col 6)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 5:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 6:int) -> 7:int, IfExprColumnNull(col 17:boolean, col 15:int, null)(children: ColAndCol(col 15:boolean, col 16:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 16:boolean) -> 17:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 15:int) -> 16:int, IfExprLongColumnLongColumn(col 20:boolean, col 21:date, col 22:date)(children: DoubleColGreaterDoubleScalar(col 19:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 18:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 18:double) -> 19:double) -> 20:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 21:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 22:date) -> 23:date Statistics: Num rows: 51 Data size: 12300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) @@ -635,7 +635,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3, 15, 26, 36, 40, 42, 44, 46, 53, 2] - selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: VectorUDFAdaptor(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date + selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 11:boolean, col 16:stringcol 25:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 11:boolean, ConstantVectorExpression(val Old) -> 16:string, IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 24:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 17:boolean, ConstantVectorExpression(val Early 2000s) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 19:boolean, ConstantVectorExpression(val Late 2000s) -> 20:string, IfExprColumnNull(col 21:boolean, col 22:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 21:boolean, ConstantVectorExpression(val Early 2010s) -> 22:string) -> 23:string) -> 24:string) -> 25:string) -> 26:string, IfExprColumnCondExpr(col 27:boolean, col 28:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 27:boolean, ConstantVectorExpression(val Old) -> 28:string, IfExprColumnCondExpr(col 29:boolean, col 30:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 29:boolean, ConstantVectorExpression(val Early 2000s) -> 30:string, IfExprColumnCondExpr(col 31:boolean, col 32:stringcol 33:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 31:boolean, ConstantVectorExpression(val Late 2000s) -> 32:string, IfExprNullNull(null, null) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 41:boolean, null, col 43:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 41:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 43:int) -> 44:int, IfExprCondExprNull(col 47:boolean, col 45:int, null)(children: ColAndCol(col 45:boolean, col 46:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 45:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 46:boolean) -> 47:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 45:int) -> 46:int, IfExprCondExprCondExpr(col 50:boolean, col 51:datecol 52:date)(children: DoubleColGreaterDoubleScalar(col 49:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 48:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 48:double) -> 49:double) -> 50:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 51:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 52:date) -> 53:date Statistics: Num rows: 51 Data size: 12300 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 666572a..0e147be 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -940,6 +940,29 @@ {"FilterColumnBetweenDynamicValue", "date", ""}, {"FilterColumnBetweenDynamicValue", "timestamp", ""}, + {"ColumnBetween", "long", ""}, + {"ColumnBetween", "double", ""}, + {"ColumnBetween", "long", "!"}, + {"ColumnBetween", "double", "!"}, + + {"StringColumnBetween", "string", ""}, + {"StringColumnBetween", "string", "!"}, + + {"TruncStringColumnBetween", "char", ""}, + {"TruncStringColumnBetween", "char", "!"}, + + {"TruncStringColumnBetween", "varchar", ""}, + {"TruncStringColumnBetween", "varchar", "!"}, + + {"TimestampColumnBetween", "timestamp", ""}, + {"TimestampColumnBetween", "timestamp", "!"}, + + {"DecimalColumnBetween", "decimal", ""}, + {"DecimalColumnBetween", "decimal", "!"}, + + {"Decimal64ColumnBetween", ""}, + {"Decimal64ColumnBetween", "!"}, + {"ColumnCompareColumn", "Equal", "long", "long", "=="}, {"ColumnCompareColumn", "Equal", "long", "double", "=="}, {"ColumnCompareColumn", "Equal", "double", "double", "=="}, @@ -1368,6 +1391,15 @@ private void generate() throws Exception { generateFilterColumnBetween(tdesc); } else if (tdesc[0].equals("FilterColumnBetweenDynamicValue")) { generateFilterColumnBetweenDynamicValue(tdesc); + } else if (tdesc[0].equals("ColumnBetween") || + tdesc[0].equals("StringColumnBetween") || + tdesc[0].equals("TimestampColumnBetween") || + tdesc[0].equals("DecimalColumnBetween")) { + generateColumnBetween(tdesc); + } else if (tdesc[0].equals("TruncStringColumnBetween")) { + generateTruncStringColumnBetween(tdesc); + } else if (tdesc[0].equals("Decimal64ColumnBetween")) { + generateDecimal64ColumnBetween(tdesc); } else if (tdesc[0].equals("ScalarArithmeticColumn") || tdesc[0].equals("ScalarDivideColumn")) { generateScalarArithmeticColumn(tdesc); } else if (tdesc[0].equals("FilterColumnCompareColumn")) { @@ -1693,6 +1725,63 @@ private void generateFilterColumnBetweenDynamicValue(String[] tdesc) throws Exce className, templateString); } + private void generateColumnBetween(String[] tdesc) throws Exception { + String operandType = tdesc[1]; + String optionalNot = tdesc[2]; + + String className = getCamelCaseType(operandType) + "Column" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + String inputColumnVectorType = getColumnVectorType(operandType); + + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", inputColumnVectorType); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", optionalNot); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateTruncStringColumnBetween(String[] tdesc) throws Exception { + String operandType = tdesc[1]; + String optionalNot = tdesc[2]; + + String className = getCamelCaseType(operandType) + "Column" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + String baseClassName = "StringColumn" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateDecimal64ColumnBetween(String[] tdesc) throws Exception { + String optionalNot = tdesc[1]; + + String className = "Decimal64Column" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + String baseClassName = "LongColumn" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + private void generateColumnCompareColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2];