diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 0a79256..48ac0d0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -560,7 +560,7 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, if (udf instanceof GenericUDFBetween) { return getBetweenFilterExpression(childExpr, mode); } else if (udf instanceof GenericUDFIn) { - return getInFilterExpression(childExpr); + return getInExpression(childExpr, mode); } else if (udf instanceof GenericUDFBridge) { VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode); if (v != null) { @@ -583,11 +583,9 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, } /** - * Create a filter expression for column IN ( ) - * @param childExpr - * @return + * Create a filter or boolean-valued expression for column IN ( ) */ - private VectorExpression getInFilterExpression(List childExpr) + private VectorExpression getInExpression(List childExpr, Mode mode) throws HiveException { ExprNodeDesc colExpr = childExpr.get(0); String colType = colExpr.getTypeString(); @@ -609,48 +607,41 @@ private VectorExpression getInFilterExpression(List childExpr) // determine class Class cl = null; if (isIntFamily(colType)) { - cl = FilterLongColumnInList.class; + cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); long[] inVals = new long[childrenForInList.size()]; for (int i = 0; i != inVals.length; i++) { inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i)); } - FilterLongColumnInList f = (FilterLongColumnInList) - createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION); - f.setInListValues(inVals); - expr = f; + expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION); + ((ILongInExpr) expr).setInListValues(inVals); } else if (colType.equals("timestamp")) { - cl = FilterLongColumnInList.class; + cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); long[] inVals = new long[childrenForInList.size()]; for (int i = 0; i != inVals.length; i++) { inVals[i] = getTimestampScalar(childrenForInList.get(i)); } - FilterLongColumnInList f = (FilterLongColumnInList) - createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION); - f.setInListValues(inVals); - expr = f; + expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION); + ((ILongInExpr) expr).setInListValues(inVals); } else if (colType.equals("string")) { - cl = FilterStringColumnInList.class; + cl = (mode == Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class); byte[][] inVals = new byte[childrenForInList.size()][]; for (int i = 0; i != inVals.length; i++) { inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i)); } - FilterStringColumnInList f =(FilterStringColumnInList) - createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION); - f.setInListValues(inVals); - expr = f; + expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION); + ((IStringInExpr) expr).setInListValues(inVals); } else if (isFloatFamily(colType)) { - cl = FilterDoubleColumnInList.class; + cl = (mode == Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class); double[] inValsD = new double[childrenForInList.size()]; for (int i = 0; i != inValsD.length; i++) { inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i)); } - FilterDoubleColumnInList f = (FilterDoubleColumnInList) - createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION); - f.setInListValues(inValsD); - expr = f; - } else { - throw new HiveException("Type " + colType + " not supported for IN in vectorized mode"); - } + expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION); + ((IDoubleInExpr) expr).setInListValues(inValsD); + } + + // Return the desired VectorExpression if found. Otherwise, return null to cause + // execution to fall back to row mode. return expr; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java new file mode 100644 index 0000000..bb0696e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java @@ -0,0 +1,168 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class DoubleColumnInList extends VectorExpression implements IDoubleInExpr { + + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + private double[] inListValues; + + // The set object containing the IN list. This is optimized for lookup + // of the data type of the column. + private transient CuckooSetDouble inSet; + + public DoubleColumnInList(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public DoubleColumnInList() { + super(); + inSet = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + if (inSet == null) { + inSet = new CuckooSetDouble(inListValues.length); + inSet.load(inListValues); + } + + DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + double[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + outNulls[i] = false; + } else { + + // comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public double[] getInListValues() { + return this.inListValues; + } + + public void setInListValues(double[] a) { + this.inListValues = a; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // return null since this will be handled as a special case in VectorizationContext + return null; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java index f175ffc..05dcb43 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java @@ -34,7 +34,7 @@ /** * Evaluate IN filter on a batch for a vector of doubles. */ -public class FilterDoubleColumnInList extends VectorExpression { +public class FilterDoubleColumnInList extends VectorExpression implements IDoubleInExpr { private static final long serialVersionUID = 1L; private int inputCol; private double[] inListValues; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java index 6f27bda..5068b7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java @@ -33,7 +33,8 @@ /** * Evaluate IN filter on a batch for a vector of longs. */ -public class FilterLongColumnInList extends VectorExpression { +public class FilterLongColumnInList extends VectorExpression implements ILongInExpr { + private static final long serialVersionUID = 1L; private int inputCol; private long[] inListValues; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java index 8b88bf4..2434e90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java @@ -37,7 +37,7 @@ * the inner loop, and there is a hash table implemented * with Cuckoo hashing that has fast lookup to do the IN test. */ -public class FilterStringColumnInList extends VectorExpression { +public class FilterStringColumnInList extends VectorExpression implements IStringInExpr { private static final long serialVersionUID = 1L; private int inputCol; private byte[][] inListValues; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java new file mode 100644 index 0000000..3323cac --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IDoubleInExpr.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Interface used for both filter and non-filter versions of IN to simplify + * VectorizationContext code. + */ +public interface IDoubleInExpr { + void setInListValues(double[] inVals); +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java new file mode 100644 index 0000000..19a2d90 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ILongInExpr.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Interface used for both filter and non-filter versions of IN to simplify + * VectorizationContext code. + */ +public interface ILongInExpr { + void setInListValues(long[] inVals); +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java new file mode 100644 index 0000000..c162238 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IStringInExpr.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Interface used for both filter and non-filter versions of IN to simplify + * VectorizationContext code. + */ +public interface IStringInExpr { + void setInListValues(byte[][] inVals); +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java new file mode 100644 index 0000000..3d41cd6 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class LongColumnInList extends VectorExpression implements ILongInExpr { + + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + private long[] inListValues; + + // The set object containing the IN list. This is optimized for lookup + // of the data type of the column. + private transient CuckooSetLong inSet; + + public LongColumnInList(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public LongColumnInList() { + super(); + inSet = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + if (inSet == null) { + inSet = new CuckooSetLong(inListValues.length); + inSet.load(inListValues); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + outNulls[i] = false; + } else { + + // comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public long[] getInListValues() { + return this.inListValues; + } + + public void setInListValues(long [] a) { + this.inListValues = a; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + + // return null since this will be handled as a special case in VectorizationContext + return null; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java new file mode 100644 index 0000000..b1b824d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.UDFLike; +import org.apache.hadoop.io.Text; + +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Evaluate an IN boolean expression (not a filter) on a batch for a vector of strings. + * This is optimized so that no objects have to be created in + * the inner loop, and there is a hash table implemented + * with Cuckoo hashing that has fast lookup to do the IN test. + */ +public class StringColumnInList extends VectorExpression implements IStringInExpr { + private static final long serialVersionUID = 1L; + private int inputCol; + private int outputColumn; + private byte[][] inListValues; + + // The set object containing the IN list. This is optimized for lookup + // of the data type of the column. + private transient CuckooSetBytes inSet; + + public StringColumnInList() { + super(); + inSet = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public StringColumnInList(int colNum, int outputColumn) { + this.inputCol = colNum; + this.outputColumn = outputColumn; + inSet = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + if (inSet == null) { + inSet = new CuckooSetBytes(inListValues.length); + inSet.load(inListValues); + } + + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] start = inputColVector.start; + int[] len = inputColVector.length; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = inputColVector.isRepeating; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0; + } + outputColVector.isNull[0] = nullPos[0]; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; + } + outputColVector.isNull[i] = nullPos[i]; + } + } else { + System.arraycopy(nullPos, 0, outputColVector.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; + } + } + } + } + } + + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + public void setOutputColumn(int value) { + this.outputColumn = value; + } + + public int getInputCol() { + return inputCol; + } + + public void setInputCol(int colNum) { + this.inputCol = colNum; + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + public byte[][] getInListValues() { + return this.inListValues; + } + + public void setInListValues(byte [][] a) { + this.inListValues = a; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 73bcee0..a0e205d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble; @@ -38,12 +39,14 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.NotCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.RoundWithNumDigitsDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsFalse; import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLTrim; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLower; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUpper; @@ -958,8 +961,9 @@ public void testBetweenFilters() throws HiveException { assertTrue(ve instanceof FilterLongColumnNotBetween); } + // Test translation of both IN filters and boolean-valued IN expressions (non-filters). @Test - public void testInFilters() throws HiveException { + public void testInFiltersAndExprs() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha"); ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo"); @@ -980,6 +984,8 @@ public void testInFilters() throws HiveException { VectorizationContext vc = new VectorizationContext(columnMap, 2); VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringColumnInList); + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); + assertTrue(ve instanceof StringColumnInList); // long IN children1.set(0, new ExprNodeColumnDesc(Long.class, "col1", "table", false)); @@ -987,6 +993,8 @@ public void testInFilters() throws HiveException { children1.set(2, new ExprNodeConstantDesc(20)); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterLongColumnInList); + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); + assertTrue(ve instanceof LongColumnInList); // double IN children1.set(0, new ExprNodeColumnDesc(Double.class, "col1", "table", false)); @@ -994,5 +1002,7 @@ public void testInFilters() throws HiveException { children1.set(2, new ExprNodeConstantDesc(20d)); ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterDoubleColumnInList); + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); + assertTrue(ve instanceof DoubleColumnInList); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index d603739..ba1ac59 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.junit.Assert; @@ -460,4 +461,67 @@ public void testFilterExprAndExpr() { assertEquals(2, batch1.selected[0]); } + + @Test + public void testLongInExpr() { + + // check basic operation + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut(); + LongColumnVector outV = (LongColumnVector) b.cols[1]; + long[] inVals = new long[2]; + inVals[0] = 0; + inVals[1] = -2; + LongColumnInList expr = new LongColumnInList(0, 1); + expr.setInListValues(inVals); + expr.evaluate(b); + assertEquals(1, outV.vector[0]); + assertEquals(0, outV.vector[1]); + + // check null handling + b.cols[0].noNulls = false; + b.cols[0].isNull[0] = true; + expr.evaluate(b); + assertEquals(true, !outV.noNulls && outV.isNull[0]); + assertEquals(0, outV.vector[1]); + + // check isRepeating handling + b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut(); + outV = (LongColumnVector) b.cols[1]; + b.cols[0].isRepeating = true; + expr.evaluate(b); + assertEquals(true, outV.isRepeating); + assertEquals(1, outV.vector[0]); + } + + @Test + public void testDoubleInExpr() { + + // check basic operation + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut(); + LongColumnVector outV = (LongColumnVector) b.cols[1]; + double[] inVals = new double[2]; + inVals[0] = -1.5d; + inVals[1] = 30d; + b.size = 2; + DoubleColumnInList expr = new DoubleColumnInList(0, 1); + expr.setInListValues(inVals); + expr.evaluate(b); + assertEquals(1, outV.vector[0]); + assertEquals(0, outV.vector[1]); + + // check null handling + b.cols[0].noNulls = false; + b.cols[0].isNull[0] = true; + expr.evaluate(b); + assertEquals(true, !outV.noNulls && outV.isNull[0]); + assertEquals(0, outV.vector[1]); + + // check isRepeating handling + b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut(); + outV = (LongColumnVector) b.cols[1]; + b.cols[0].isRepeating = true; + expr.evaluate(b); + assertEquals(true, outV.isRepeating); + assertEquals(1, outV.vector[0]); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java index fa4da40..40de47d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java @@ -216,4 +216,29 @@ public void testScalarLongDivide() { assertFalse(out.noNulls); assertFalse(out.isRepeating); } + + @Test + public void testBooleanValuedLongIn() { + VectorizedRowBatch batch = getBatch(); + long[] a = new long[2]; + a[0] = 20; + a[1] = 1000; + batch.size = 2; + VectorExpression expr = (new LongColumnInList(0, 1)); + ((LongColumnInList) expr).setInListValues(a); + expr.evaluate(batch); + LongColumnVector out = (LongColumnVector) batch.cols[1]; + Assert.assertEquals(0, out.vector[0]); + Assert.assertEquals(1, out.vector[1]); + } + + private VectorizedRowBatch getBatch() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + LongColumnVector v = new LongColumnVector(); + v.vector[0] = 10; + v.vector[1] = 20; + b.cols[0] = v; + b.cols[1] = new LongColumnVector(); + return b; + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index ea34856..7a15e63 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -1830,4 +1830,43 @@ private VectorizedRowBatch makeTrimBatch() { b.size = 5; return b; } + + // Test boolean-valued (non-filter) IN expression for strings + @Test + public void testStringInExpr() { + + // test basic operation + VectorizedRowBatch b = makeStringBatch(); + b.size = 2; + b.cols[0].noNulls = true; + byte[][] inVals = new byte[2][]; + inVals[0] = red; + inVals[1] = blue; + StringColumnInList expr = new StringColumnInList(0, 2); + expr.setInListValues(inVals); + expr.evaluate(b); + LongColumnVector outV = (LongColumnVector) b.cols[2]; + Assert.assertEquals(1, outV.vector[0]); + Assert.assertEquals(0, outV.vector[1]); + + // test null input + b = makeStringBatch(); + b.size = 2; + b.cols[0].noNulls = false; + b.cols[0].isNull[0] = true; + expr.evaluate(b); + outV = (LongColumnVector) b.cols[2]; + Assert.assertEquals(true, !outV.noNulls && outV.isNull[0] && !outV.isNull[1]); + Assert.assertEquals(0, outV.vector[1]); + + // test repeating logic + b = makeStringBatch(); + b.size = 2; + b.cols[0].noNulls = true; + b.cols[0].isRepeating = true; + expr.evaluate(b); + outV = (LongColumnVector) b.cols[2]; + Assert.assertEquals(1, outV.vector[0]); + Assert.assertEquals(true, outV.isRepeating); + } }