diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java index e43d892..0c38052 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java @@ -285,7 +285,7 @@ public int execute(DriverContext driverContext) { if (vectorPath) { if (validateVectorPath()) { - LOG.debug("Going down the vectorization path"); + LOG.info("Going down the vectorization path"); job.setMapperClass(VectorExecMapper.class); } else { //fall back to non-vector mode diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 1d3db58..9179efd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -611,7 +611,7 @@ private VectorExpression getVectorExpression(GenericUDFOPOr udf, int inputCol = getInputColumnIndex(colDesc.getColumn()); ve2 = new SelectColumnIsTrue(inputCol); } else { - ve2 = getVectorExpression(leftExpr); + ve2 = getVectorExpression(rightExpr); } return new FilterExprOrExpr(ve1,ve2); @@ -633,6 +633,7 @@ private VectorExpression getVectorExpression(GenericUDFOPNot udf, private VectorExpression getVectorExpression(GenericUDFOPAnd udf, List childExpr) throws HiveException { + ExprNodeDesc leftExpr = childExpr.get(0); ExprNodeDesc rightExpr = childExpr.get(1); @@ -651,7 +652,7 @@ private VectorExpression getVectorExpression(GenericUDFOPAnd udf, int inputCol = getInputColumnIndex(colDesc.getColumn()); ve2 = new SelectColumnIsTrue(inputCol); } else { - ve2 = getVectorExpression(leftExpr); + ve2 = getVectorExpression(rightExpr); } return new FilterExprAndExpr(ve1,ve2); @@ -965,7 +966,8 @@ private String getBinaryColumnColumnExpressionClassName(String colType1, private String getOutputColType(String inputType1, String inputType2, String method) { if (method.equalsIgnoreCase("divide") || inputType1.equalsIgnoreCase("double") || - inputType2.equalsIgnoreCase("double")) { + inputType2.equalsIgnoreCase("double") || inputType1.equalsIgnoreCase("float") || + inputType2.equalsIgnoreCase("float")) { return "double"; } else { if (inputType1.equalsIgnoreCase("string") || inputType2.equalsIgnoreCase("string")) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java index b3f8ec1..cdef233 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java @@ -24,18 +24,17 @@ * This class represents a non leaf binary operator in the expression tree. */ public class FilterExprAndExpr extends VectorExpression { - VectorExpression childExpr1; - VectorExpression childExpr2; public FilterExprAndExpr(VectorExpression childExpr1, VectorExpression childExpr2) { - this.childExpr1 = childExpr1; - this.childExpr2 = childExpr2; + this.childExpressions = new VectorExpression[2]; + childExpressions[0] = childExpr1; + childExpressions[1] = childExpr2; } @Override public void evaluate(VectorizedRowBatch batch) { - childExpr1.evaluate(batch); - childExpr2.evaluate(batch); + childExpressions[0].evaluate(batch); + childExpressions[1].evaluate(batch); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java index 3595654..98ad0ee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java @@ -24,15 +24,14 @@ * This class represents an Or expression. This applies short circuit optimization. */ public class FilterExprOrExpr extends VectorExpression { - private final VectorExpression childExpr1; - private final VectorExpression childExpr2; private final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; private int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; private final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; public FilterExprOrExpr(VectorExpression childExpr1, VectorExpression childExpr2) { - this.childExpr1 = childExpr1; - this.childExpr2 = childExpr2; + this.childExpressions = new VectorExpression[2]; + childExpressions[0] = childExpr1; + childExpressions[1] = childExpr2; } @Override @@ -41,6 +40,10 @@ public void evaluate(VectorizedRowBatch batch) { if (n <= 0) { return; } + + VectorExpression childExpr1 = this.childExpressions[0]; + VectorExpression childExpr2 = this.childExpressions[1]; + boolean prevSelectInUse = batch.selectedInUse; // Save the original selected vector diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 5f54712..246423c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -8,7 +8,10 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; @@ -26,7 +29,10 @@ import org.apache.hadoop.hive.ql.udf.UDFOPMultiply; import org.apache.hadoop.hive.ql.udf.UDFOPPlus; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.junit.Test; public class TestVectorizationContext { @@ -146,6 +152,93 @@ public void testStringFilterExpressions() throws HiveException { } @Test + public void testFloatInExpressions() throws HiveException { + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Float.class, "col1", "table", false); + ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); + + GenericUDFBridge udf = new GenericUDFBridge("+", false, UDFOPPlus.class); + ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(); + exprDesc.setGenericUDF(udf); + + List children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(constDesc); + exprDesc.setChildExprs(children1); + + Map columnMap = new HashMap(); + columnMap.put("col1", 0); + + VectorizationContext vc = new VectorizationContext(columnMap, 2); + vc.setOperatorType(OperatorType.SELECT); + + VectorExpression ve = vc.getVectorExpression(exprDesc); + + assertTrue(ve.getOutputType().equalsIgnoreCase("double")); + } + + @Test + public void testVectorizeAndOrExpression() throws HiveException { + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); + ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); + + GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); + ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); + greaterExprDesc.setGenericUDF(udf); + List children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(constDesc); + greaterExprDesc.setChildExprs(children1); + + ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Float.class, "col2", "table", false); + ExprNodeConstantDesc const2Desc = new ExprNodeConstantDesc(new Float(1.0)); + + GenericUDFOPLessThan udf2 = new GenericUDFOPLessThan(); + ExprNodeGenericFuncDesc lessExprDesc = new ExprNodeGenericFuncDesc(); + lessExprDesc.setGenericUDF(udf2); + List children2 = new ArrayList(2); + children2.add(col2Expr); + children2.add(const2Desc); + lessExprDesc.setChildExprs(children2); + + GenericUDFOPAnd andUdf = new GenericUDFOPAnd(); + ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc(); + andExprDesc.setGenericUDF(andUdf); + List children3 = new ArrayList(2); + children3.add(greaterExprDesc); + children3.add(lessExprDesc); + andExprDesc.setChildExprs(children3); + + Map columnMap = new HashMap(); + columnMap.put("col1", 0); + columnMap.put("col2", 1); + + VectorizationContext vc = new VectorizationContext(columnMap, 2); + vc.setOperatorType(OperatorType.FILTER); + + VectorExpression ve = vc.getVectorExpression(andExprDesc); + + assertEquals(ve.getClass(), FilterExprAndExpr.class); + assertEquals(ve.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); + assertEquals(ve.getChildExpressions()[1].getClass(), FilterDoubleColLessDoubleScalar.class); + + GenericUDFOPOr orUdf = new GenericUDFOPOr(); + ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc(); + orExprDesc.setGenericUDF(orUdf); + List children4 = new ArrayList(2); + children4.add(greaterExprDesc); + children4.add(lessExprDesc); + orExprDesc.setChildExprs(children4); + + + VectorExpression veOr = vc.getVectorExpression(orExprDesc); + + assertEquals(veOr.getClass(), FilterExprOrExpr.class); + assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); + assertEquals(veOr.getChildExpressions()[1].getClass(), FilterDoubleColLessDoubleScalar.class); + } + + + @Test public void testFilterWithNegativeScalar() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(-10));