diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java new file mode 100644 index 0000000..eda3d4b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.Serializable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.io.LongWritable; + +/** + * Filter operator implementation. + **/ +public class VectorFilterOperator extends Operator implements + Serializable { + + private static final long serialVersionUID = 1L; + + /** + * Counter. + * + */ + public static enum Counter { + FILTERED, PASSED + } + + private final transient LongWritable filtered_count, passed_count; + private transient VectorExpression conditionEvaluator; + transient int heartbeatInterval; + private final VectorizationContext vContext; + + public VectorFilterOperator(VectorizationContext ctxt, OperatorDesc conf) { + super(); + this.vContext = ctxt; + filtered_count = new LongWritable(); + passed_count = new LongWritable(); + this.conf = (FilterDesc) conf; + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + try { + heartbeatInterval = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVESENDHEARTBEAT); + ExprNodeDesc oldExpression = conf.getPredicate(); + vContext.setOperatorType(OperatorType.FILTER); + conditionEvaluator = vContext.getVectorExpression(oldExpression); + System.out.println("Filter class name ="+conditionEvaluator.getClass().getCanonicalName()); + statsMap.put(Counter.FILTERED, filtered_count); + statsMap.put(Counter.PASSED, passed_count); + } catch (Throwable e) { + throw new HiveException(e); + } + initializeChildren(hconf); + } + + public void setFilterCondition(VectorExpression expr) { + this.conditionEvaluator = expr; + } + + @Override + public void processOp(Object row, int tag) throws HiveException { + + VectorizedRowBatch vrg = (VectorizedRowBatch) row; + //Evaluate the predicate expression + //The selected vector represents selected rows. + conditionEvaluator.evaluate(vrg); + if (vrg.size > 0) { + forward(vrg, null); + } + } + + /** + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "FIL"; + } + + @Override + public OperatorType getType() { + return OperatorType.FILTER; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java new file mode 100644 index 0000000..061db16 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.Serializable; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; + +/** + * Select operator implementation. + */ +public class VectorSelectOperator extends Operator implements + Serializable { + + private static final long serialVersionUID = 1L; + + protected transient VectorExpression[] vExpressions; + + VectorizedRowBatch output; + private final VectorizationContext vContext; + + public VectorSelectOperator(VectorizationContext ctxt, OperatorDesc conf) { + this.vContext = ctxt; + this.conf = (SelectDesc) conf; + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + // Just forward the row as is + if (conf.isSelStarNoCompute()) { + initializeChildren(hconf); + return; + } + + List colList = conf.getColList(); + vContext.setOperatorType(OperatorType.SELECT); + vExpressions = new VectorExpression[colList.size()]; + for (int i = 0; i < colList.size(); i++) { + vExpressions[i] = vContext.getVectorExpression(colList.get(i)); + } + output = new VectorizedRowBatch(colList.size(), + VectorizedRowBatch.DEFAULT_SIZE); + initializeChildren(hconf); + } + + public void setSelectExpressions(VectorExpression[] exprs) { + this.vExpressions = exprs; + output = new VectorizedRowBatch(exprs.length, VectorizedRowBatch.DEFAULT_SIZE); + } + + public VectorizedRowBatch getOutput() { + return output; + } + + @Override + public void processOp(Object row, int tag) throws HiveException { + + // Just forward the row as is + if (conf.isSelStarNoCompute()) { + forward(row, inputObjInspectors[tag]); + return; + } + + VectorizedRowBatch vrg = (VectorizedRowBatch) row; + for (int i = 0; i < vExpressions.length; i++) { + try { + vExpressions[i].evaluate(vrg); + } catch (RuntimeException e) { + throw new HiveException("Error evaluating " + + conf.getColList().get(i).getExprString(), e); + } + } + + //Prepare output, shallow vector copy + output.selectedInUse = vrg.selectedInUse; + output.selected = vrg.selected; + output.size = vrg.size; + for (int i = 0; i < vExpressions.length; i++) { + output.cols[i] = vrg.cols[vExpressions[i].getOutputColumn()]; + } + output.numCols = vExpressions.length; + forward(output, outputObjInspector); + } + + /** + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "SEL"; + } + + @Override + public OperatorType getType() { + return OperatorType.SELECT; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java new file mode 100644 index 0000000..71386aa --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -0,0 +1,810 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterNotExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsFalse; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.udf.UDFOPDivide; +import org.apache.hadoop.hive.ql.udf.UDFOPMinus; +import org.apache.hadoop.hive.ql.udf.UDFOPMod; +import org.apache.hadoop.hive.ql.udf.UDFOPMultiply; +import org.apache.hadoop.hive.ql.udf.UDFOPNegative; +import org.apache.hadoop.hive.ql.udf.UDFOPPlus; +import org.apache.hadoop.hive.ql.udf.UDFOPPositive; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +public class VectorizationContext { + + private static final Log LOG = LogFactory.getLog( + VectorizationContext.class.getName()); + + //columnName to column position map + private final Map columnMap; + //Next column to be used for intermediate output + private int nextOutputColumn; + private OperatorType opType; + //Map column number to type + private final Map outputColumnTypes; + + public VectorizationContext(Map columnMap, + int initialOutputCol) { + this.columnMap = columnMap; + this.nextOutputColumn = initialOutputCol; + this.outputColumnTypes = new HashMap(); + } + + public int allocateOutputColumn (String columnName, String columnType) { + int newColumnIndex = nextOutputColumn++; + columnMap.put(columnName, newColumnIndex); + outputColumnTypes.put(newColumnIndex, columnType); + return newColumnIndex; + } + + public void setOperatorType(OperatorType opType) { + this.opType = opType; + } + + private VectorExpression getVectorExpression(ExprNodeColumnDesc + exprDesc) { + + int columnNum = columnMap.get(exprDesc.getColumn()); + VectorExpression expr = null; + switch (opType) { + case FILTER: + //Important: It will come here only if the column is being used as a boolean + expr = new SelectColumnIsTrue(columnNum); + break; + case SELECT: + case GROUPBY: + case REDUCESINK: + expr = new IdentityExpression(columnNum, exprDesc.getTypeString()); + break; + } + return expr; + } + + public VectorExpression[] getVectorExpressions(List exprNodes) { + int i = 0; + VectorExpression[] ret = new VectorExpression[exprNodes.size()]; + for (ExprNodeDesc e : exprNodes) { + ret[i++] = getVectorExpression(e); + } + return ret; + } + + public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) { + if (exprDesc instanceof ExprNodeColumnDesc) { + return getVectorExpression((ExprNodeColumnDesc) exprDesc); + } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; + return getVectorExpression(expr.getGenericUDF(), + expr.getChildExprs()); + } + return null; + } + + public VectorExpression getUnaryMinusExpression(List childExprList) { + ExprNodeDesc childExpr = childExprList.get(0); + int inputCol; + String colType; + VectorExpression v1 = null; + int outputCol = this.nextOutputColumn++; + if (childExpr instanceof ExprNodeGenericFuncDesc) { + v1 = getVectorExpression(childExpr); + inputCol = v1.getOutputColumn(); + colType = v1.getOutputType(); + } else if (childExpr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; + inputCol = columnMap.get(colDesc.getColumn()); + colType = colDesc.getTypeString(); + } else { + throw new RuntimeException("Expression not supported: "+childExpr); + } + String className = getNormalizedTypeName(colType) + "colUnaryMinus"; + this.nextOutputColumn = outputCol+1; + VectorExpression expr; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol, outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + if (v1 != null) { + expr.setChildExpressions(new VectorExpression [] {v1}); + } + return expr; + } + + public VectorExpression getUnaryPlusExpression(List childExprList) { + ExprNodeDesc childExpr = childExprList.get(0); + int inputCol; + String colType; + VectorExpression v1 = null; + if (childExpr instanceof ExprNodeGenericFuncDesc) { + v1 = getVectorExpression(childExpr); + inputCol = v1.getOutputColumn(); + colType = v1.getOutputType(); + } else if (childExpr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; + inputCol = columnMap.get(colDesc.getColumn()); + colType = colDesc.getTypeString(); + } else { + throw new RuntimeException("Expression not supported: "+childExpr); + } + VectorExpression expr = new IdentityExpression(inputCol, colType); + if (v1 != null) { + expr.setChildExpressions(new VectorExpression [] {v1}); + } + return expr; + } + + private VectorExpression getVectorExpression(GenericUDF udf, + List childExpr) { + if (udf instanceof GenericUDFOPLessThan) { + return getVectorBinaryComparisonFilterExpression("Less", childExpr); + } else if (udf instanceof GenericUDFOPEqualOrLessThan) { + return getVectorBinaryComparisonFilterExpression("LessEqual", childExpr); + } else if (udf instanceof GenericUDFOPEqual) { + return getVectorBinaryComparisonFilterExpression("Equal", childExpr); + } else if (udf instanceof GenericUDFOPGreaterThan) { + return getVectorBinaryComparisonFilterExpression("Greater", childExpr); + } else if (udf instanceof GenericUDFOPEqualOrGreaterThan) { + return getVectorBinaryComparisonFilterExpression("GreaterEqual", childExpr); + } else if (udf instanceof GenericUDFOPNotEqual) { + return getVectorBinaryComparisonFilterExpression("NotEqual", childExpr); + } else if (udf instanceof GenericUDFOPNotNull) { + return getVectorExpression((GenericUDFOPNotNull) udf, childExpr); + } else if (udf instanceof GenericUDFOPNull) { + return getVectorExpression((GenericUDFOPNull) udf, childExpr); + } else if (udf instanceof GenericUDFOPAnd) { + return getVectorExpression((GenericUDFOPAnd) udf, childExpr); + } else if (udf instanceof GenericUDFOPNot) { + return getVectorExpression((GenericUDFOPNot) udf, childExpr); + } else if (udf instanceof GenericUDFOPOr) { + return getVectorExpression((GenericUDFOPOr) udf, childExpr); + } else if (udf instanceof GenericUDFBridge) { + return getVectorExpression((GenericUDFBridge) udf, childExpr); + } + return null; + } + + private VectorExpression getVectorExpression(GenericUDFBridge udf, + List childExpr) { + Class cl = udf.getUdfClass(); + // (UDFBaseNumericOp.class.isAssignableFrom(cl)) == true + if (cl.equals(UDFOPPlus.class)) { + return getBinaryArithmeticExpression("Add", childExpr); + } else if (cl.equals(UDFOPMinus.class)) { + return getBinaryArithmeticExpression("Subtract", childExpr); + } else if (cl.equals(UDFOPMultiply.class)) { + return getBinaryArithmeticExpression("Multiply", childExpr); + } else if (cl.equals(UDFOPDivide.class)) { + return getBinaryArithmeticExpression("Divide", childExpr); + } else if (cl.equals(UDFOPMod.class)) { + return getBinaryArithmeticExpression("Modulo", childExpr); + } else if (cl.equals(UDFOPNegative.class)) { + return getUnaryMinusExpression(childExpr); + } else if (cl.equals(UDFOPPositive.class)) { + return getUnaryPlusExpression(childExpr); + } + return null; + } + + private VectorExpression getBinaryArithmeticExpression(String method, + List childExpr) { + ExprNodeDesc leftExpr = childExpr.get(0); + ExprNodeDesc rightExpr = childExpr.get(1); + + VectorExpression expr = null; + if ( (leftExpr instanceof ExprNodeColumnDesc) && + (rightExpr instanceof ExprNodeConstantDesc) ) { + ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; + int inputCol = columnMap.get(leftColDesc.getColumn()); + String colType = leftColDesc.getTypeString(); + String scalarType = constDesc.getTypeString(); + String className = getBinaryColumnScalarExpressionClassName(colType, + scalarType, method); + int outputCol = this.nextOutputColumn++; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol, + getScalarValue(constDesc), outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + } else if ( (rightExpr instanceof ExprNodeColumnDesc) && + (leftExpr instanceof ExprNodeConstantDesc) ) { + ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; + int inputCol = columnMap.get(rightColDesc.getColumn()); + String colType = rightColDesc.getTypeString(); + String scalarType = constDesc.getTypeString(); + String className = getBinaryColumnScalarExpressionClassName(colType, + scalarType, method); + int outputCol = this.nextOutputColumn++; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol, + getScalarValue(constDesc), outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + } else if ( (rightExpr instanceof ExprNodeColumnDesc) && + (leftExpr instanceof ExprNodeColumnDesc) ) { + ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; + ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; + int inputCol1 = columnMap.get(rightColDesc.getColumn()); + int inputCol2 = columnMap.get(leftColDesc.getColumn()); + String colType1 = rightColDesc.getTypeString(); + String colType2 = leftColDesc.getTypeString(); + String className = getBinaryColumnColumnExpressionClassName(colType1, + colType2, method); + int outputCol = this.nextOutputColumn++; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2, + outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) + && (rightExpr instanceof ExprNodeColumnDesc)) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr; + int outputCol = this.nextOutputColumn++; + VectorExpression v1 = getVectorExpression(leftExpr); + int inputCol1 = v1.getOutputColumn(); + int inputCol2 = columnMap.get(colDesc.getColumn()); + String colType1 = v1.getOutputType(); + String colType2 = colDesc.getTypeString(); + String className = getBinaryColumnColumnExpressionClassName(colType1, + colType2, method); + this.nextOutputColumn = outputCol+1; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2, + outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v1}); + } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) + && (rightExpr instanceof ExprNodeConstantDesc)) { + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; + int outputCol = this.nextOutputColumn++; + VectorExpression v1 = getVectorExpression(leftExpr); + int inputCol1 = v1.getOutputColumn(); + String colType1 = v1.getOutputType(); + String scalarType = constDesc.getTypeString(); + String className = getBinaryColumnScalarExpressionClassName(colType1, + scalarType, method); + this.nextOutputColumn = outputCol+1; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, + getScalarValue(constDesc), outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v1}); + } else if ((leftExpr instanceof ExprNodeColumnDesc) + && (rightExpr instanceof ExprNodeGenericFuncDesc)) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr; + int outputCol = this.nextOutputColumn++; + VectorExpression v2 = getVectorExpression(rightExpr); + int inputCol1 = columnMap.get(colDesc.getColumn()); + int inputCol2 = v2.getOutputColumn(); + String colType1 = colDesc.getTypeString(); + String colType2 = v2.getOutputType(); + String className = getBinaryColumnColumnExpressionClassName(colType1, + colType2, method); + this.nextOutputColumn = outputCol+1; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2, + outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v2}); + } else if ((leftExpr instanceof ExprNodeConstantDesc) + && (rightExpr instanceof ExprNodeGenericFuncDesc)) { + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; + int outputCol = this.nextOutputColumn++; + VectorExpression v2 = getVectorExpression(rightExpr); + int inputCol2 = v2.getOutputColumn(); + String colType2 = v2.getOutputType(); + String scalarType = constDesc.getTypeString(); + String className = getBinaryScalarColumnExpressionClassName(colType2, + scalarType, method); + this.nextOutputColumn = outputCol+1; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol2, + getScalarValue(constDesc), outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v2}); + } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) + && (rightExpr instanceof ExprNodeGenericFuncDesc)) { + //For arithmetic expression, the child expressions must be materializing + //columns + int outputCol = this.nextOutputColumn++; + VectorExpression v1 = getVectorExpression(leftExpr); + VectorExpression v2 = getVectorExpression(rightExpr); + int inputCol1 = v1.getOutputColumn(); + int inputCol2 = v2.getOutputColumn(); + String colType1 = v1.getOutputType(); + String colType2 = v2.getOutputType(); + String className = getBinaryColumnColumnExpressionClassName(colType1, + colType2, method); + //Reclaim the output columns + this.nextOutputColumn = outputCol+1; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2, + outputCol); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v1, v2}); + } + return expr; + + } + + private VectorExpression getVectorExpression(GenericUDFOPOr udf, + List childExpr) { + ExprNodeDesc leftExpr = childExpr.get(0); + ExprNodeDesc rightExpr = childExpr.get(1); + + VectorExpression ve1; + VectorExpression ve2; + if (leftExpr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr; + int inputCol = columnMap.get(colDesc.getColumn()); + ve1 = new SelectColumnIsTrue(inputCol); + } else { + ve1 = getVectorExpression(leftExpr); + } + + if (rightExpr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr; + int inputCol = columnMap.get(colDesc.getColumn()); + ve2 = new SelectColumnIsTrue(inputCol); + } else { + ve2 = getVectorExpression(leftExpr); + } + + return new FilterExprOrExpr(ve1,ve2); + } + + private VectorExpression getVectorExpression(GenericUDFOPNot udf, + List childExpr) { + ExprNodeDesc expr = childExpr.get(0); + if (expr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr; + int inputCol = columnMap.get(colDesc.getColumn()); + VectorExpression ve = new SelectColumnIsFalse(inputCol); + return ve; + } else { + VectorExpression ve = getVectorExpression(expr); + new FilterNotExpr(ve); + } + return null; + } + + private VectorExpression getVectorExpression(GenericUDFOPAnd udf, + List childExpr) { + ExprNodeDesc leftExpr = childExpr.get(0); + ExprNodeDesc rightExpr = childExpr.get(1); + + VectorExpression ve1; + VectorExpression ve2; + if (leftExpr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr; + int inputCol = columnMap.get(colDesc.getColumn()); + ve1 = new SelectColumnIsTrue(inputCol); + } else { + ve1 = getVectorExpression(leftExpr); + } + + if (rightExpr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr; + int inputCol = columnMap.get(colDesc.getColumn()); + ve2 = new SelectColumnIsTrue(inputCol); + } else { + ve2 = getVectorExpression(leftExpr); + } + + return new FilterExprAndExpr(ve1,ve2); + } + + private VectorExpression getVectorExpression(GenericUDFOPNull udf, + List childExpr) { + ExprNodeDesc expr = childExpr.get(0); + VectorExpression ve = null; + if (expr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr; + int inputCol = columnMap.get(colDesc.getColumn()); + ve = new SelectColumnIsNull(inputCol); + } else { + //TODO + } + return ve; + } + + private VectorExpression getVectorExpression(GenericUDFOPNotNull udf, + List childExpr) { + ExprNodeDesc expr = childExpr.get(0); + if (expr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr; + int inputCol = columnMap.get(colDesc.getColumn()); + VectorExpression ve = new SelectColumnIsNotNull(inputCol); + return ve; + } else { + //TODO + } + return null; + } + + private Object getScalarValue(ExprNodeConstantDesc constDesc) { + if (constDesc.getTypeString().equalsIgnoreCase("String")) { + return ((String) constDesc.getValue()).getBytes(); + } else { + return constDesc.getValue(); + } + } + + private VectorExpression getVectorBinaryComparisonFilterExpression(String + opName, List childExpr) { + + ExprNodeDesc leftExpr = childExpr.get(0); + ExprNodeDesc rightExpr = childExpr.get(1); + + VectorExpression expr = null; + if ( (leftExpr instanceof ExprNodeColumnDesc) && + (rightExpr instanceof ExprNodeConstantDesc) ) { + ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; + int inputCol = columnMap.get(leftColDesc.getColumn()); + String colType = leftColDesc.getTypeString(); + String scalarType = constDesc.getTypeString(); + String className = getFilterColumnScalarExpressionClassName(colType, + scalarType, opName); + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol, + getScalarValue(constDesc)); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + } else if ( (rightExpr instanceof ExprNodeColumnDesc) && + (leftExpr instanceof ExprNodeConstantDesc) ) { + ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; + int inputCol = columnMap.get(rightColDesc.getColumn()); + String colType = rightColDesc.getTypeString(); + String scalarType = constDesc.getTypeString(); + String className = getFilterColumnScalarExpressionClassName(colType, + scalarType, opName); + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol, + getScalarValue(constDesc)); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + } else if ( (rightExpr instanceof ExprNodeColumnDesc) && + (leftExpr instanceof ExprNodeColumnDesc) ) { + ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; + ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; + int inputCol1 = columnMap.get(rightColDesc.getColumn()); + int inputCol2 = columnMap.get(leftColDesc.getColumn()); + String colType1 = rightColDesc.getTypeString(); + String colType2 = leftColDesc.getTypeString(); + String className = getFilterColumnColumnExpressionClassName(colType1, + colType2, opName); + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + } else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) && + (rightExpr instanceof ExprNodeColumnDesc) ) { + VectorExpression v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr); + ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; + int inputCol1 = v1.getOutputColumn(); + int inputCol2 = columnMap.get(leftColDesc.getColumn()); + String colType1 = v1.getOutputType(); + String colType2 = leftColDesc.getTypeString(); + String className = getFilterColumnColumnExpressionClassName(colType1, + colType2, opName); + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v1}); + } else if ( (leftExpr instanceof ExprNodeColumnDesc) && + (rightExpr instanceof ExprNodeGenericFuncDesc) ) { + ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) leftExpr; + VectorExpression v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr); + int inputCol1 = columnMap.get(rightColDesc.getColumn()); + int inputCol2 = v2.getOutputColumn(); + String colType1 = rightColDesc.getTypeString(); + String colType2 = v2.getOutputType(); + String className = getFilterColumnColumnExpressionClassName(colType1, + colType2, opName); + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v2}); + } else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) && + (rightExpr instanceof ExprNodeConstantDesc) ) { + VectorExpression v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr); + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; + int inputCol1 = v1.getOutputColumn(); + String colType1 = v1.getOutputType(); + String scalarType = constDesc.getTypeString(); + String className = getFilterColumnScalarExpressionClassName(colType1, + scalarType, opName); + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, + getScalarValue(constDesc)); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v1}); + } else if ( (leftExpr instanceof ExprNodeConstantDesc) && + (rightExpr instanceof ExprNodeGenericFuncDesc) ) { + ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; + VectorExpression v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr); + int inputCol2 = v2.getOutputColumn(); + String scalarType = constDesc.getTypeString(); + String colType = v2.getOutputType(); + String className = getFilterScalarColumnExpressionClassName(colType, + scalarType, opName); + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol2, + getScalarValue(constDesc)); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v2}); + } else { + //For comparison expression, the child expressions must be materializing + //columns + VectorExpression v1 = getVectorExpression(leftExpr); + VectorExpression v2 = getVectorExpression(rightExpr); + int inputCol1 = v1.getOutputColumn(); + int inputCol2 = v2.getOutputColumn(); + String colType1 = v1.getOutputType(); + String colType2 = v2.getOutputType(); + String className = getFilterColumnColumnExpressionClassName(colType1, + colType2, opName); + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2); + } catch (Exception ex) { + throw new RuntimeException((ex)); + } + expr.setChildExpressions(new VectorExpression [] {v1, v2}); + } + return expr; + } + + private String getNormalizedTypeName(String colType) { + String normalizedType = null; + if (colType.equalsIgnoreCase("Double") || colType.equalsIgnoreCase("Float")) { + normalizedType = "Double"; + } else if (colType.equalsIgnoreCase("String")) { + normalizedType = "String"; + } else { + normalizedType = "Long"; + } + return normalizedType; + } + + private String getFilterColumnColumnExpressionClassName(String colType1, + String colType2, String opName) { + StringBuilder b = new StringBuilder(); + b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); + if (opType.equals(OperatorType.FILTER)) { + b.append("Filter"); + } + b.append(getNormalizedTypeName(colType1)); + b.append("Col"); + b.append(opName); + b.append(getNormalizedTypeName(colType2)); + b.append("Column"); + return b.toString(); + } + + private String getFilterColumnScalarExpressionClassName(String colType, String + scalarType, String opName) { + StringBuilder b = new StringBuilder(); + b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); + if (opType.equals(OperatorType.FILTER)) { + b.append("Filter"); + } + b.append(getNormalizedTypeName(colType)); + b.append("Col"); + b.append(opName); + b.append(getNormalizedTypeName(scalarType)); + b.append("Scalar"); + return b.toString(); + } + + private String getFilterScalarColumnExpressionClassName(String colType, String + scalarType, String opName) { + StringBuilder b = new StringBuilder(); + b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); + if (opType.equals(OperatorType.FILTER)) { + b.append("Filter"); + } + b.append(getNormalizedTypeName(scalarType)); + b.append("Scalar"); + b.append(opName); + b.append(getNormalizedTypeName(colType)); + b.append("Column"); + return b.toString(); + } + + private String getBinaryColumnScalarExpressionClassName(String colType, + String scalarType, String method) { + StringBuilder b = new StringBuilder(); + b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); + b.append(getNormalizedTypeName(colType)); + b.append("Col"); + b.append(method); + b.append(getNormalizedTypeName(scalarType)); + b.append("Scalar"); + return b.toString(); + } + + private String getBinaryScalarColumnExpressionClassName(String colType, + String scalarType, String method) { + StringBuilder b = new StringBuilder(); + b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); + b.append(this.getNormalizedTypeName(scalarType)); + b.append("Scalar"); + b.append(method); + b.append(this.getNormalizedTypeName(colType)); + b.append("Column"); + return b.toString(); + } + + private String getBinaryColumnColumnExpressionClassName(String colType1, + String colType2, String method) { + StringBuilder b = new StringBuilder(); + b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); + b.append(getNormalizedTypeName(colType1)); + b.append("Col"); + b.append(method); + b.append(getNormalizedTypeName(colType2)); + b.append("Column"); + return b.toString(); + } + + static Object[][] columnTypes = { + {"Double", DoubleColumnVector.class}, + {"Long", LongColumnVector.class}, + {"String", BytesColumnVector.class}, + }; + + public VectorizedRowBatch allocateRowBatch(int rowCount) throws HiveException { + VectorizedRowBatch ret = new VectorizedRowBatch(nextOutputColumn, rowCount); + for (int i=0; i < nextOutputColumn; ++i) { + if (false == outputColumnTypes.containsKey(i)) { + continue; + } + String columnTypeName = outputColumnTypes.get(i); + for (Object[] columnType: columnTypes) { + if (columnTypeName.equalsIgnoreCase((String)columnType[0])) { + Class columnTypeClass = (Class)columnType[1]; + try { + Constructor ctor = columnTypeClass.getConstructor(int.class); + ret.cols[i] = ctor.newInstance(rowCount); + } + catch(Exception e) { + throw new HiveException ( + String.format( + "Internal exception occured trying to allocate a vectorized column %d of type %s", + i, columnTypeName), + e); + } + } + } + } + return ret; + } + + Object[][] mapObjectInspectors = { + {"double", PrimitiveObjectInspectorFactory.writableDoubleObjectInspector}, + {"long", PrimitiveObjectInspectorFactory.writableLongObjectInspector}, + }; + + public ObjectInspector getVectorRowObjectInspector(List columnNames) throws HiveException { + List oids = new ArrayList(); + for(String columnName: columnNames) { + int columnIndex = columnMap.get(columnName); + String outputType = outputColumnTypes.get(columnIndex); + ObjectInspector oi = null; + for(Object[] moi: mapObjectInspectors) { + if (outputType.equalsIgnoreCase((String) moi[0])) { + oi = (ObjectInspector) moi[1]; + break; + } + } + if (oi == null) { + throw new HiveException(String.format("Unsuported type: %s for column %d:%s", + outputType, columnIndex, columnName)); + } + oids.add(oi); + } + + return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, oids); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java new file mode 100644 index 0000000..3ad6c7f --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -0,0 +1,211 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import junit.framework.Assert; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongScalar; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +public class TestVectorFilterOperator { + + public static class FakeDataReader { + int size; + VectorizedRowBatch vrg; + int currentSize = 0; + private final int numCols; + private final int len = 1024; + + public FakeDataReader(int size, int numCols) { + this.size = size; + this.numCols = numCols; + vrg = new VectorizedRowBatch(numCols, len); + for (int i =0; i < numCols; i++) { + try { + Thread.sleep(2); + } catch (InterruptedException e) { + + } + vrg.cols[i] = getLongVector(len); + } + } + + public VectorizedRowBatch getNext() { + if (currentSize >= size) { + vrg.size = 0; + return vrg; + } else { + vrg.size = len; + currentSize += vrg.size; + vrg.selectedInUse = false; + return vrg; + } + } + + private LongColumnVector getLongVector(int len) { + LongColumnVector lcv = new LongColumnVector(len); + TestVectorizedRowBatch.setRandomLongCol(lcv); + return lcv; + } + } + + @Test + public void testBasicFilterOperator() throws HiveException { + VectorFilterOperator vfo = new VectorFilterOperator(null, null); + VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); + VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); + VectorExpression ve3 = new FilterExprAndExpr(ve1,ve2); + vfo.setFilterCondition(ve3); + + FakeDataReader fdr = new FakeDataReader(1024*1, 3); + + VectorizedRowBatch vrg = fdr.getNext(); + + vfo.processOp(vrg, 0); + + //Verify + int rows = 0; + for (int i =0; i < 1024; i++){ + LongColumnVector l1 = (LongColumnVector) vrg.cols[0]; + LongColumnVector l2 = (LongColumnVector) vrg.cols[1]; + LongColumnVector l3 = (LongColumnVector) vrg.cols[2]; + if ((l1.vector[i] > l2.vector[i]) && (l3.vector[i] == 0)) { + rows ++; + } + } + Assert.assertEquals(rows, vrg.size); + } + + @Test + public void testBasicFilterLargeData() throws HiveException { + VectorFilterOperator vfo = new VectorFilterOperator(null, null); + VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); + VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); + VectorExpression ve3 = new FilterExprAndExpr(ve1,ve2); + vfo.setFilterCondition(ve3); + + FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3); + + long startTime = System.currentTimeMillis(); + VectorizedRowBatch vrg = fdr.getNext(); + + while (vrg.size > 0) { + vfo.processOp(vrg, 0); + vrg = fdr.getNext(); + } + long endTime = System.currentTimeMillis(); + System.out.println("testBaseFilterOperator Op Time = "+(endTime-startTime)); + + //Base time + + fdr = new FakeDataReader(16*1024*1024, 3); + + long startTime1 = System.currentTimeMillis(); + vrg = fdr.getNext(); + LongColumnVector l1 = (LongColumnVector) vrg.cols[0]; + LongColumnVector l2 = (LongColumnVector) vrg.cols[1]; + LongColumnVector l3 = (LongColumnVector) vrg.cols[2]; + int rows = 0; + for (int j =0; j < 16 *1024; j++) { + for (int i = 0; i < l1.vector.length && i < l2.vector.length && i < l3.vector.length; i++) { + if ((l1.vector[i] > l2.vector[i]) && (l3.vector[i] == 0)) { + rows++; + } + } + } + long endTime1 = System.currentTimeMillis(); + System.out.println("testBaseFilterOperator base Op Time = "+(endTime1-startTime1)); + + } + + static VectorizedRowBatch getSimpleLongBatch() + { + VectorizedRowBatch batch = new VectorizedRowBatch(1); + LongColumnVector lcv = new LongColumnVector(); + batch.cols[0] = lcv; + long[] v = lcv.vector; + + v[0] = 0; + v[1] = 1; + v[2] = 2; + v[3] = 3; + batch.size = 4; + + return batch; + } + + @Test + public void testColOpScalarNumericFilterNullAndRepeatingLogic() + { + // No nulls, not repeating + FilterLongColGreaterLongScalar f = new FilterLongColGreaterLongScalar(0, 1); // col > 1 + VectorizedRowBatch batch = getSimpleLongBatch(); + batch.cols[0].noNulls = true; + batch.cols[0].isRepeating = false; + f.evaluate(batch); + // only last 2 rows qualify + Assert.assertEquals(2, batch.size); + // show that their positions are recorded + Assert.assertTrue(batch.selectedInUse); + Assert.assertEquals(2, batch.selected[0]); + Assert.assertEquals(3, batch.selected[1]); + + // make everything qualify and ensure selected is not in use + f = new FilterLongColGreaterLongScalar(0, -1); // col > -1 + batch = getSimpleLongBatch(); + f.evaluate(batch); + Assert.assertFalse(batch.selectedInUse); + Assert.assertEquals(4, batch.size); + + // has nulls, not repeating + batch = getSimpleLongBatch(); + f = new FilterLongColGreaterLongScalar(0, 1); // col > 1 + batch.cols[0].noNulls = false; + batch.cols[0].isRepeating = false; + batch.cols[0].isNull[3] = true; + f.evaluate(batch); + Assert.assertTrue(batch.selectedInUse); + Assert.assertEquals(1, batch.size); + Assert.assertEquals(2, batch.selected[0]); + + // no nulls, is repeating + batch = getSimpleLongBatch(); + f = new FilterLongColGreaterLongScalar(0, -1); // col > -1 + batch.cols[0].noNulls = true; + batch.cols[0].isRepeating = true; + f.evaluate(batch); + Assert.assertFalse(batch.selectedInUse); + Assert.assertEquals(4, batch.size); // everything qualifies (4 rows, all with value -1) + + // has nulls, is repeating + batch = getSimpleLongBatch(); + batch.cols[0].noNulls = false; + batch.cols[0].isRepeating = true; + batch.cols[0].isNull[0] = true; + f.evaluate(batch); + Assert.assertEquals(0, batch.size); // all values are null so none qualify + } + +} + diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java new file mode 100644 index 0000000..0011f8e --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorizedRowGroupGenUtil; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.junit.Test; + +public class TestVectorSelectOperator { + + @Test + public void testSelectOperator() throws HiveException { + VectorSelectOperator vso = new VectorSelectOperator(null, new SelectDesc(false)); + + VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch( + VectorizedRowBatch.DEFAULT_SIZE, 4, 17); + + LongColAddLongColumn lcalcExpr = new LongColAddLongColumn(0,1,2); + IdentityExpression iexpr = new IdentityExpression(3, "long"); + + VectorExpression [] ves = new VectorExpression [] { lcalcExpr, iexpr }; + + vso.setSelectExpressions(ves); + + vso.processOp(vrg, 0); + + VectorizedRowBatch output = vso.getOutput(); + + assertEquals(2, output.numCols); + + LongColumnVector out0 = (LongColumnVector) output.cols[0]; + LongColumnVector out1 = (LongColumnVector) output.cols[1]; + + LongColumnVector in0 = (LongColumnVector) vrg.cols[0]; + LongColumnVector in1 = (LongColumnVector) vrg.cols[1]; + LongColumnVector in2 = (LongColumnVector) vrg.cols[2]; + LongColumnVector in3 = (LongColumnVector) vrg.cols[3]; + + for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i ++) { + assertEquals(in0.vector[i]+in1.vector[i], out0.vector[i]); + assertEquals(in2.vector[i], out0.vector[i]); + assertEquals(in3.vector[i], out1.vector[i]); + } + } + +}