diff --git ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareColumn.txt index 635b3e6..6d9af3b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareColumn.txt @@ -36,7 +36,7 @@ public class extends VectorExpression { private byte[] value; private int outputColumn; - public (int colNum, byte[] value, int outputColumn) { + public (byte[] value, int colNum, int outputColumn) { this.colNum = colNum; this.value = value; this.outputColumn = outputColumn; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 8ab5395..ae7cbe6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.CollectDesc; import org.apache.hadoop.hive.ql.plan.DemuxDesc; import org.apache.hadoop.hive.ql.plan.DummyStoreDesc; @@ -124,7 +125,7 @@ public OpTuple(Class descClass, Class> opClass) { } public static Operator getVectorOperator(T conf, - VectorizationContext vContext) { + VectorizationContext vContext) throws HiveException { Class descClass = (Class) conf.getClass(); for (OpTuple o : vectorOpvec) { if (o.descClass == descClass) { @@ -136,11 +137,11 @@ public OpTuple(Class descClass, Class> opClass) { return op; } catch (Exception e) { e.printStackTrace(); - throw new RuntimeException(e); + throw new HiveException(e); } } } - throw new RuntimeException("No vector operator for descriptor class " + throw new HiveException("No vector operator for descriptor class " + descClass.getName()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionMappings.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionMappings.java new file mode 100644 index 0000000..0823756 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionMappings.java @@ -0,0 +1,457 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.Mode; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NotCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsFalse; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColGreaterDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColGreaterEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColGreaterEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColGreaterLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColLessDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColLessEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColLessEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColLessLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColModuloDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColModuloLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColMultiplyDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColMultiplyLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColNotEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColNotEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinus; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColGreaterDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColGreaterEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColGreaterEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColGreaterLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColNotEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColNotEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColLessDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColLessEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColLessEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColLessLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColNotEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColNotEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColEqualStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterEqualStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessEqualStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColNotEqualStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColDivideDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColGreaterDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColGreaterEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColGreaterEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColGreaterLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColLessDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColLessEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColLessEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColLessLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColMultiplyDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColMultiplyLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColNotEqualDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColNotEqualLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColSubtractLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinus; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColEqualStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColGreaterEqualStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColGreaterStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColLessEqualStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColLessStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColNotEqualStringColumn; +import org.apache.hadoop.hive.ql.udf.UDFOPDivide; +import org.apache.hadoop.hive.ql.udf.UDFOPMinus; +import org.apache.hadoop.hive.ql.udf.UDFOPMod; +import org.apache.hadoop.hive.ql.udf.UDFOPMultiply; +import org.apache.hadoop.hive.ql.udf.UDFOPNegative; +import org.apache.hadoop.hive.ql.udf.UDFOPPlus; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; + +public class VectorExpressionMappings { + + private final static Map> binaryExprColColMapping = new HashMap>(); + private final static Map> binaryExprColScalarMapping = new HashMap>(); + private final static Map> binaryExprScalarColMapping = new HashMap>(); + + private final static Map> binaryFilterExprColColMapping = new HashMap>(); + private final static Map> binaryFilterExprColScalarMapping = new HashMap>(); + private final static Map> binaryFilterExprScalarColMapping = new HashMap>(); + + private final static Map> unaryExpression = new HashMap>(); + private final static Map> unaryFilterExpression = new HashMap>(); + private final static Map> andOrNotNullExpressions = new HashMap>(); + private final static Map> andOrNotNullFilterExpressions = new HashMap>(); + + public VectorExpressionMappings() { + // Singleton style - Initialize the static mappings first time. + if (binaryExprColColMapping.isEmpty()) { + initBinaryColCol(); + try { + initBinaryColumnScalar(binaryExprColColMapping, binaryExprColScalarMapping); + initBinaryScalarColumn(binaryExprColColMapping, binaryExprScalarColMapping); + + initBinaryFilterColCol(); + initBinaryColumnScalar(binaryFilterExprColColMapping, + binaryFilterExprColScalarMapping); + initBinaryScalarColumn(binaryFilterExprColColMapping, + binaryFilterExprScalarColMapping); + initUnaryExpressions(); + initNotAndOrNullExpressions(); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + } + + public Class getUnaryExpression(VectorizationContext.Mode m, Class udf, String type) { + Map> unaryMap = m.equals(VectorizationContext.Mode.FILTER) ? + unaryFilterExpression : unaryExpression; + return getBinaryExpression(unaryMap, udf, type, ""); + } + + public Class getBinaryColumnColumnExpression(VectorizationContext.Mode m, Class udf, + String type1, String type2) { + Map> binaryColColMap = m.equals(VectorizationContext.Mode.FILTER) ? + binaryFilterExprColColMapping : binaryExprColColMapping; + return getBinaryExpression(binaryColColMap, udf, type1, type2); + } + + public Class getBinaryColumnScalarExpression(VectorizationContext.Mode m, Class udf, + String type1, String type2) { + Map> binaryMap = m.equals(VectorizationContext.Mode.FILTER) ? + binaryFilterExprColScalarMapping : binaryExprColScalarMapping; + return getBinaryExpression(binaryMap, udf, type1, type2); + } + + public Class getBinaryScalarColumnExpression(VectorizationContext.Mode m, Class udf, + String type1, String type2) { + Map> binaryMap = m.equals(VectorizationContext.Mode.FILTER) ? + binaryFilterExprScalarColMapping : binaryExprScalarColMapping; + return getBinaryExpression(binaryMap, udf, type1, type2); + } + + public Class getNotAndOrNullsExpression(VectorizationContext.Mode m, Class udf) { + if (m.equals(Mode.FILTER)) { + return andOrNotNullFilterExpressions.get(udf.getSimpleName()); + } else { + return andOrNotNullExpressions.get(udf.getSimpleName()); + } + } + + public Class getBinaryExpression(Map> exprMap, Class udf, + String type1, String type2) { + Class ve = exprMap.get(type1 + type2 + udf.getSimpleName()); + return ve; + } + + private void initBinaryColCol() { + binaryExprColColMapping.put("longlong" + GenericUDFOPLessThan.class.getSimpleName(), + LongColLessLongColumn.class); + binaryExprColColMapping.put("longdouble" + GenericUDFOPLessThan.class.getSimpleName(), + LongColLessDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + GenericUDFOPLessThan.class.getSimpleName(), + DoubleColLessLongColumn.class); + binaryExprColColMapping.put("doubledouble" + GenericUDFOPLessThan.class.getSimpleName(), + DoubleColLessDoubleColumn.class); + binaryExprColColMapping.put("stringstring" + GenericUDFOPLessThan.class.getSimpleName(), + StringColLessStringColumn.class); + + binaryExprColColMapping.put("longlong" + GenericUDFOPGreaterThan.class.getSimpleName(), + LongColGreaterLongColumn.class); + binaryExprColColMapping.put("longdouble" + GenericUDFOPGreaterThan.class.getSimpleName(), + LongColGreaterDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + GenericUDFOPGreaterThan.class.getSimpleName(), + DoubleColGreaterLongColumn.class); + binaryExprColColMapping.put("doubledouble" + GenericUDFOPGreaterThan.class.getSimpleName(), + DoubleColGreaterDoubleColumn.class); + binaryExprColColMapping.put("stringstring" + GenericUDFOPGreaterThan.class.getSimpleName(), + StringColGreaterStringColumn.class); + + + binaryExprColColMapping.put("longlong" + GenericUDFOPEqual.class.getSimpleName(), + LongColEqualLongColumn.class); + binaryExprColColMapping.put("longdouble" + GenericUDFOPEqual.class.getSimpleName(), + LongColEqualDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + GenericUDFOPEqual.class.getSimpleName(), + DoubleColEqualLongColumn.class); + binaryExprColColMapping.put("doubledouble" + GenericUDFOPEqual.class.getSimpleName(), + DoubleColEqualDoubleColumn.class); + binaryExprColColMapping.put("stringstring" + GenericUDFOPEqual.class.getSimpleName(), + StringColEqualStringColumn.class); + + binaryExprColColMapping.put("longlong" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + LongColLessEqualLongColumn.class); + binaryExprColColMapping.put("longdouble" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + LongColLessEqualDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + DoubleColLessEqualLongColumn.class); + binaryExprColColMapping.put("doubledouble" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + DoubleColLessEqualDoubleColumn.class); + binaryExprColColMapping.put("stringstring" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + StringColLessEqualStringColumn.class); + + binaryExprColColMapping.put("longlong" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + LongColGreaterEqualLongColumn.class); + binaryExprColColMapping.put( + "longdouble" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + LongColGreaterEqualDoubleColumn.class); + binaryExprColColMapping.put( + "doublelong" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + DoubleColGreaterEqualLongColumn.class); + binaryExprColColMapping.put( + "doubledouble" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + DoubleColGreaterEqualDoubleColumn.class); + binaryExprColColMapping.put( + "stringstring" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + StringColGreaterEqualStringColumn.class); + + binaryExprColColMapping.put("longlong" + GenericUDFOPNotEqual.class.getSimpleName(), + LongColNotEqualLongColumn.class); + binaryExprColColMapping.put("longdouble" + GenericUDFOPNotEqual.class.getSimpleName(), + LongColNotEqualDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + GenericUDFOPNotEqual.class.getSimpleName(), + DoubleColNotEqualLongColumn.class); + binaryExprColColMapping.put("doubledouble" + GenericUDFOPNotEqual.class.getSimpleName(), + DoubleColNotEqualDoubleColumn.class); + binaryExprColColMapping.put("stringstring" + GenericUDFOPNotEqual.class.getSimpleName(), + StringColNotEqualStringColumn.class); + + binaryExprColColMapping.put("longlong" + UDFOPPlus.class.getSimpleName(), + LongColAddLongColumn.class); + binaryExprColColMapping.put("longdouble" + UDFOPPlus.class.getSimpleName(), + LongColAddDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + UDFOPPlus.class.getSimpleName(), + DoubleColAddLongColumn.class); + binaryExprColColMapping.put("doubledouble" + UDFOPPlus.class.getSimpleName(), + DoubleColAddDoubleColumn.class); + + binaryExprColColMapping.put("longlong" + UDFOPMinus.class.getSimpleName(), + LongColSubtractLongColumn.class); + binaryExprColColMapping.put("longdouble" + UDFOPMinus.class.getSimpleName(), + LongColAddDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + UDFOPMinus.class.getSimpleName(), + DoubleColAddLongColumn.class); + binaryExprColColMapping.put("doubledouble" + UDFOPMinus.class.getSimpleName(), + DoubleColAddDoubleColumn.class); + + binaryExprColColMapping.put("longlong" + UDFOPMultiply.class.getSimpleName(), + LongColMultiplyLongColumn.class); + binaryExprColColMapping.put("longdouble" + UDFOPMultiply.class.getSimpleName(), + LongColMultiplyDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + UDFOPMultiply.class.getSimpleName(), + DoubleColMultiplyLongColumn.class); + binaryExprColColMapping.put("doubledouble" + UDFOPMultiply.class.getSimpleName(), + DoubleColMultiplyDoubleColumn.class); + + binaryExprColColMapping.put("longlong" + UDFOPDivide.class.getSimpleName(), + LongColDivideLongColumn.class); + binaryExprColColMapping.put("longdouble" + UDFOPDivide.class.getSimpleName(), + LongColDivideDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + UDFOPDivide.class.getSimpleName(), + DoubleColDivideLongColumn.class); + binaryExprColColMapping.put("doubledouble" + UDFOPDivide.class.getSimpleName(), + DoubleColDivideDoubleColumn.class); + + binaryExprColColMapping.put("longlong" + UDFOPMod.class.getSimpleName(), + LongColModuloLongColumn.class); + binaryExprColColMapping.put("longdouble" + UDFOPMod.class.getSimpleName(), + LongColModuloDoubleColumn.class); + binaryExprColColMapping.put("doublelong" + UDFOPMod.class.getSimpleName(), + DoubleColModuloLongColumn.class); + binaryExprColColMapping.put("doubledouble" + UDFOPMod.class.getSimpleName(), + DoubleColModuloDoubleColumn.class); + } + + private void initBinaryFilterColCol() { + binaryFilterExprColColMapping.put("longlong" + GenericUDFOPLessThan.class.getSimpleName(), + FilterLongColLessLongColumn.class); + binaryFilterExprColColMapping.put("longdouble" + GenericUDFOPLessThan.class.getSimpleName(), + FilterLongColLessDoubleColumn.class); + binaryFilterExprColColMapping.put("doublelong" + GenericUDFOPLessThan.class.getSimpleName(), + FilterDoubleColLessLongColumn.class); + binaryFilterExprColColMapping.put("doubledouble" + GenericUDFOPLessThan.class.getSimpleName(), + FilterDoubleColLessDoubleColumn.class); + binaryFilterExprColColMapping.put("stringstring" + GenericUDFOPLessThan.class.getSimpleName(), + FilterStringColLessStringColumn.class); + + binaryFilterExprColColMapping.put("longlong" + GenericUDFOPGreaterThan.class.getSimpleName(), + FilterLongColGreaterLongColumn.class); + binaryFilterExprColColMapping.put("longdouble" + GenericUDFOPGreaterThan.class.getSimpleName(), + FilterLongColGreaterDoubleColumn.class); + binaryFilterExprColColMapping.put("doublelong" + GenericUDFOPGreaterThan.class.getSimpleName(), + FilterDoubleColGreaterLongColumn.class); + binaryFilterExprColColMapping.put( + "doubledouble" + GenericUDFOPGreaterThan.class.getSimpleName(), + FilterDoubleColGreaterDoubleColumn.class); + binaryFilterExprColColMapping.put( + "stringstring" + GenericUDFOPGreaterThan.class.getSimpleName(), + FilterStringColGreaterStringColumn.class); + + binaryFilterExprColColMapping.put("longlong" + GenericUDFOPEqual.class.getSimpleName(), + FilterLongColEqualLongColumn.class); + binaryFilterExprColColMapping.put("longdouble" + GenericUDFOPEqual.class.getSimpleName(), + FilterLongColEqualDoubleColumn.class); + binaryFilterExprColColMapping.put("doublelong" + GenericUDFOPEqual.class.getSimpleName(), + FilterDoubleColEqualLongColumn.class); + binaryFilterExprColColMapping.put("doubledouble" + GenericUDFOPEqual.class.getSimpleName(), + FilterDoubleColEqualDoubleColumn.class); + binaryFilterExprColColMapping.put("stringstring" + GenericUDFOPEqual.class.getSimpleName(), + FilterStringColEqualStringColumn.class); + + binaryFilterExprColColMapping.put( + "longlong" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + FilterLongColLessEqualLongColumn.class); + binaryFilterExprColColMapping.put( + "longdouble" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + FilterLongColLessEqualDoubleColumn.class); + binaryFilterExprColColMapping.put( + "doublelong" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + FilterDoubleColLessEqualLongColumn.class); + binaryFilterExprColColMapping.put( + "doubledouble" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + FilterDoubleColLessEqualDoubleColumn.class); + binaryFilterExprColColMapping.put( + "stringstring" + GenericUDFOPEqualOrLessThan.class.getSimpleName(), + FilterStringColLessEqualStringColumn.class); + + binaryFilterExprColColMapping.put( + "longlong" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + FilterLongColGreaterEqualLongColumn.class); + binaryFilterExprColColMapping.put( + "longdouble" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + FilterLongColGreaterEqualDoubleColumn.class); + binaryFilterExprColColMapping.put( + "doublelong" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + FilterDoubleColGreaterEqualLongColumn.class); + binaryFilterExprColColMapping.put( + "doubledouble" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + FilterDoubleColGreaterEqualDoubleColumn.class); + binaryFilterExprColColMapping.put( + "stringstring" + GenericUDFOPEqualOrGreaterThan.class.getSimpleName(), + FilterStringColGreaterEqualStringColumn.class); + + binaryFilterExprColColMapping.put("longlong" + GenericUDFOPNotEqual.class.getSimpleName(), + FilterLongColNotEqualLongColumn.class); + binaryFilterExprColColMapping.put("longdouble" + GenericUDFOPNotEqual.class.getSimpleName(), + FilterLongColNotEqualDoubleColumn.class); + binaryFilterExprColColMapping.put("doublelong" + GenericUDFOPNotEqual.class.getSimpleName(), + FilterDoubleColNotEqualLongColumn.class); + binaryFilterExprColColMapping.put("doubledouble" + GenericUDFOPNotEqual.class.getSimpleName(), + FilterDoubleColNotEqualDoubleColumn.class); + binaryFilterExprColColMapping.put("stringstring" + GenericUDFOPNotEqual.class.getSimpleName(), + FilterStringColNotEqualStringColumn.class); + } + + private void initBinaryColumnScalar(Map> src, Map> dst) + throws ClassNotFoundException { + for (String key : src.keySet()) { + Class value = src.get(key); + String colScalarClassName = value.getPackage().getName() + "." + + value.getSimpleName().replace("Column", "Scalar"); + Class colScalarClass = Class.forName(colScalarClassName); + dst.put(key, colScalarClass); + } + } + + private void initBinaryScalarColumn(Map> src, Map> dst) + throws ClassNotFoundException { + for (String key : src.keySet()) { + Class value = src.get(key); + String scalarColumnClassName = value.getPackage().getName() + "." + + value.getSimpleName().replace("Col", "Scalar").replace("Scalarumn", "Column"); + Class scalarColumnClass = Class.forName(scalarColumnClassName); + dst.put(key, scalarColumnClass); + } + } + + public int getSize() { + return binaryExprColColMapping.size() + binaryExprColScalarMapping.size() + + binaryExprScalarColMapping.size() + binaryFilterExprColColMapping.size() + + binaryFilterExprColScalarMapping.size() + binaryFilterExprScalarColMapping.size(); + } + + private void initUnaryExpressions() { + unaryExpression.put("long" + UDFOPNegative.class.getSimpleName(), LongColUnaryMinus.class); + unaryExpression.put("double" + UDFOPNegative.class.getSimpleName(), + DoubleColUnaryMinus.class); + } + + private void initNotAndOrNullExpressions() { + andOrNotNullExpressions.put(GenericUDFOPNull.class.getSimpleName(), IsNull.class); + andOrNotNullExpressions.put(GenericUDFOPNotNull.class.getSimpleName(), IsNotNull.class); + andOrNotNullExpressions.put(GenericUDFOPNot.class.getSimpleName(), NotCol.class); + andOrNotNullExpressions.put(GenericUDFOPAnd.class.getSimpleName(), ColAndCol.class); + andOrNotNullExpressions.put(GenericUDFOPOr.class.getSimpleName(), ColOrCol.class); + andOrNotNullFilterExpressions.put(GenericUDFOPNull.class.getSimpleName(), + SelectColumnIsNull.class); + andOrNotNullFilterExpressions.put(GenericUDFOPNotNull.class.getSimpleName(), + SelectColumnIsNotNull.class); + andOrNotNullFilterExpressions.put(GenericUDFOPNot.class.getSimpleName(), + SelectColumnIsFalse.class); + andOrNotNullFilterExpressions.put(GenericUDFOPAnd.class.getSimpleName(), + FilterExprAndExpr.class); + andOrNotNullFilterExpressions.put(GenericUDFOPOr.class.getSimpleName(), FilterExprOrExpr.class); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 616a28a..a8b2f96 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -36,12 +36,8 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; -import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull; -import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColScalar; @@ -87,12 +83,7 @@ import org.apache.hadoop.hive.ql.udf.UDFLike; import org.apache.hadoop.hive.ql.udf.UDFMinute; import org.apache.hadoop.hive.ql.udf.UDFMonth; -import org.apache.hadoop.hive.ql.udf.UDFOPDivide; -import org.apache.hadoop.hive.ql.udf.UDFOPMinus; -import org.apache.hadoop.hive.ql.udf.UDFOPMod; -import org.apache.hadoop.hive.ql.udf.UDFOPMultiply; import org.apache.hadoop.hive.ql.udf.UDFOPNegative; -import org.apache.hadoop.hive.ql.udf.UDFOPPlus; import org.apache.hadoop.hive.ql.udf.UDFOPPositive; import org.apache.hadoop.hive.ql.udf.UDFRTrim; import org.apache.hadoop.hive.ql.udf.UDFSecond; @@ -105,13 +96,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; @@ -128,14 +113,22 @@ * */ public class VectorizationContext { + private static final Log LOG = LogFactory.getLog( VectorizationContext.class.getName()); + public enum Mode { + FILTER, + PROJECTION + } + + VectorExpressionMappings vMap; + //columnName to column position map private final Map columnMap; private final int firstOutputColumnIndex; + private Mode mode = Mode.PROJECTION; - private OperatorType opType; //Map column number to type private final OutputColumnManager ocm; @@ -144,12 +137,17 @@ public VectorizationContext(Map columnMap, this.columnMap = columnMap; this.ocm = new OutputColumnManager(initialOutputCol); this.firstOutputColumnIndex = initialOutputCol; + vMap = new VectorExpressionMappings(); } private int getInputColumnIndex(String name) { return columnMap.get(name); } + private int getInputColumnIndex(ExprNodeColumnDesc colExpr) { + return columnMap.get(colExpr.getColumn()); + } + private class OutputColumnManager { private final int initialOutputCol; private int outputColCount = 0; @@ -206,21 +204,28 @@ void freeOutputColumn(int index) { } public void setOperatorType(OperatorType opType) { - this.opType = opType; + switch (opType) { + case FILTER: + mode = Mode.FILTER; + break; + case SELECT: + case GROUPBY: + case REDUCESINK: + mode = Mode.PROJECTION; + break; + } } private VectorExpression getVectorExpression(ExprNodeColumnDesc exprDesc) { int columnNum = getInputColumnIndex(exprDesc.getColumn()); VectorExpression expr = null; - switch (opType) { + switch (mode) { case FILTER: //Important: It will come here only if the column is being used as a boolean expr = new SelectColumnIsTrue(columnNum); break; - case SELECT: - case GROUPBY: - case REDUCESINK: + case PROJECTION: expr = new IdentityExpression(columnNum, exprDesc.getTypeString()); break; } @@ -255,7 +260,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveEx if (isCustomUDF(expr)) { ve = getCustomUDFExpression(expr); } else { - ve = getVectorExpression(expr.getGenericUDF(), + ve = getGenericUdfVectorExpression(expr.getGenericUDF(), expr.getChildExprs()); } } else if (exprDesc instanceof ExprNodeConstantDesc) { @@ -306,8 +311,6 @@ private ExprNodeDesc foldConstantsForUnaryExpression(ExprNodeDesc exprDesc) thro Class cl = ((GenericUDFBridge) gudf).getUdfClass(); - ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) exprDesc.getChildren().get(0); - if (cl.equals(UDFOPNegative.class) || cl.equals(UDFOPPositive.class)) { ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprDesc); ObjectInspector output = evaluator.initialize(null); @@ -334,7 +337,7 @@ private VectorExpression getConstantVectorExpression(ExprNodeConstantDesc exprDe } else if (type.equalsIgnoreCase("string")) { return new ConstantVectorExpression(outCol, ((String) exprDesc.getValue()).getBytes()); } else if (type.equalsIgnoreCase("boolean")) { - if (this.opType == OperatorType.FILTER) { + if (this.mode == Mode.FILTER) { if (((Boolean) exprDesc.getValue()).booleanValue()) { return new FilterConstantBooleanVectorExpression(1); } else { @@ -352,7 +355,7 @@ private VectorExpression getConstantVectorExpression(ExprNodeConstantDesc exprDe } } - private VectorExpression getUnaryMinusExpression(List childExprList) + private VectorExpression getUnaryPlusExpression(List childExprList) throws HiveException { ExprNodeDesc childExpr = childExprList.get(0); int inputCol; @@ -369,71 +372,162 @@ private VectorExpression getUnaryMinusExpression(List childExprLis } else { throw new HiveException("Expression not supported: "+childExpr); } - String outputColumnType = getNormalizedTypeName(colType); - int outputCol = ocm.allocateOutputColumn(outputColumnType); - String className = "org.apache.hadoop.hive.ql.exec.vector.expressions.gen." - + outputColumnType + "ColUnaryMinus"; - VectorExpression expr; - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol, outputCol); - } catch (Exception ex) { - throw new HiveException(ex); - } + VectorExpression expr = new IdentityExpression(inputCol, colType); if (v1 != null) { expr.setChildExpressions(new VectorExpression [] {v1}); - ocm.freeOutputColumn(v1.getOutputColumn()); } return expr; } - private VectorExpression getUnaryPlusExpression(List childExprList) + private VectorExpression getBinaryExpression(Class udf, List childExpr) throws HiveException { - ExprNodeDesc childExpr = childExprList.get(0); - int inputCol; - String colType; + ExprNodeDesc leftExpr = childExpr.get(0); + ExprNodeDesc rightExpr = childExpr.get(1); + String type1 = getNormalizedTypeName(leftExpr.getTypeString()).toLowerCase(); + String type2 = getNormalizedTypeName(rightExpr.getTypeString()).toLowerCase(); + int column1 = -1; + int column2 = -1; VectorExpression v1 = null; - if (childExpr instanceof ExprNodeGenericFuncDesc) { - v1 = getVectorExpression(childExpr); - inputCol = v1.getOutputColumn(); - colType = v1.getOutputType(); - } else if (childExpr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; - inputCol = getInputColumnIndex(colDesc.getColumn()); - colType = colDesc.getTypeString(); + VectorExpression v2 = null; + List children = new ArrayList(); + if (leftExpr instanceof ExprNodeGenericFuncDesc) { + v1 = getVectorExpression(leftExpr); + children.add(v1); + column1 = v1.getOutputColumn(); + } else if (leftExpr instanceof ExprNodeColumnDesc) { + column1 = getInputColumnIndex((ExprNodeColumnDesc) leftExpr); + } + + if (rightExpr instanceof ExprNodeGenericFuncDesc) { + v2 = getVectorExpression(rightExpr); + children.add(v2); + column2 = v2.getOutputColumn(); + } else if (rightExpr instanceof ExprNodeColumnDesc) { + column2 = this.getInputColumnIndex((ExprNodeColumnDesc) rightExpr); + } + + VectorExpression vectorExpression = null; + try { + if (leftExpr instanceof ExprNodeConstantDesc) { + Object scalarVal = getScalarValue((ExprNodeConstantDesc) leftExpr); + Class vclass = this.vMap.getBinaryScalarColumnExpression(mode, udf, type1, type2); + if (vclass == null) { + return null; + } + vectorExpression = instantiateExpression(vclass, scalarVal, column2); + } else if (rightExpr instanceof ExprNodeConstantDesc) { + Object scalarVal = getScalarValue((ExprNodeConstantDesc) rightExpr); + Class vclass = this.vMap.getBinaryColumnScalarExpression(mode, udf, type1, type2); + if (vclass == null) { + return null; + } + vectorExpression = instantiateExpression(vclass, column1, scalarVal); + } else { + Class vclass = this.vMap.getBinaryColumnColumnExpression(mode, udf, type1, type2); + if (vclass == null) { + return null; + } + vectorExpression = instantiateExpression(vclass, column1, column2); + } + if ((vectorExpression != null) && !children.isEmpty()){ + vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); + } + } catch (Exception ex) { + throw new HiveException(ex); + } finally { + if (v1 != null) { + ocm.freeOutputColumn(v1.getOutputColumn()); + } + if (v2 != null) { + ocm.freeOutputColumn(v2.getOutputColumn()); + } + } + return vectorExpression; + } + + private VectorExpression getUnaryExpression(Class udf, List childExpr) + throws HiveException { + ExprNodeDesc cExpr = childExpr.get(0); + String type = getNormalizedTypeName(cExpr.getTypeString()).toLowerCase(); + int column = -1; + List children = new ArrayList(); + VectorExpression v1 = null; + if (cExpr instanceof ExprNodeGenericFuncDesc) { + v1 = getVectorExpression(cExpr); + children.add(v1); + column = v1.getOutputColumn(); + } else if (cExpr instanceof ExprNodeColumnDesc) { + column = getInputColumnIndex((ExprNodeColumnDesc) cExpr); } else { - throw new HiveException("Expression not supported: "+childExpr); + throw new HiveException("Unary udfs over constants are not supported, in vector mode"); } - VectorExpression expr = new IdentityExpression(inputCol, colType); - if (v1 != null) { - expr.setChildExpressions(new VectorExpression [] {v1}); + + VectorExpression vectorExpression = null; + try { + Class vclass = this.vMap.getUnaryExpression(mode, udf, type); + if (vclass == null) { + return null; + } + vectorExpression = instantiateExpression(vclass, column); + if ((vectorExpression != null) && !children.isEmpty()){ + vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0])); + } + } catch (Exception ex) { + throw new HiveException(ex); + } finally { + if (v1 != null) { + ocm.freeOutputColumn(column); + } } - return expr; + return vectorExpression; + } + + private VectorExpression instantiateExpression(Class vclass, Object...args) + throws HiveException { + Constructor ctor = getConstructor(vclass); + int numParams = ctor.getParameterTypes().length; + try { + if (numParams == 0) { + return (VectorExpression) ctor.newInstance(); + } else if (numParams == args.length) { + return (VectorExpression) ctor.newInstance(args); + } else if (numParams == args.length + 1) { + // Additional argument is needed, which is the outputcolumn. + String outType = ((VectorExpression) vclass.newInstance()).getOutputType(); + int outputCol = ocm.allocateOutputColumn(outType); + Object [] newArgs = Arrays.copyOf(args, numParams); + newArgs[numParams-1] = outputCol; + return (VectorExpression) ctor.newInstance(newArgs); + } + } catch (Exception ex) { + throw new HiveException("Could not instantiate " + vclass.getSimpleName(), ex); + } + return null; } - private VectorExpression getVectorExpression(GenericUDF udf, + private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List childExpr) throws HiveException { - if (udf instanceof GenericUDFOPLessThan) { - return getVectorBinaryComparisonFilterExpression("Less", childExpr); - } else if (udf instanceof GenericUDFOPEqualOrLessThan) { - return getVectorBinaryComparisonFilterExpression("LessEqual", childExpr); - } else if (udf instanceof GenericUDFOPEqual) { - return getVectorBinaryComparisonFilterExpression("Equal", childExpr); - } else if (udf instanceof GenericUDFOPGreaterThan) { - return getVectorBinaryComparisonFilterExpression("Greater", childExpr); - } else if (udf instanceof GenericUDFOPEqualOrGreaterThan) { - return getVectorBinaryComparisonFilterExpression("GreaterEqual", childExpr); - } else if (udf instanceof GenericUDFOPNotEqual) { - return getVectorBinaryComparisonFilterExpression("NotEqual", childExpr); - } else if (udf instanceof GenericUDFOPNotNull) { - return getVectorExpression((GenericUDFOPNotNull) udf, childExpr); - } else if (udf instanceof GenericUDFOPNull) { - return getVectorExpression((GenericUDFOPNull) udf, childExpr); - } else if (udf instanceof GenericUDFOPAnd) { - return getVectorExpression((GenericUDFOPAnd) udf, childExpr); - } else if (udf instanceof GenericUDFOPNot) { - return getVectorExpression((GenericUDFOPNot) udf, childExpr); - } else if (udf instanceof GenericUDFOPOr) { - return getVectorExpression((GenericUDFOPOr) udf, childExpr); + VectorExpression ve = null; + Class udfClass = udf.getClass(); + if (udf instanceof GenericUDFBridge) { + udfClass = ((GenericUDFBridge) udf).getUdfClass(); + } + + if (childExpr.size() == 2) { + ve = getBinaryExpression(udfClass, childExpr); + } else if (childExpr.size() == 1) { + ve = getUnaryExpression(udfClass, childExpr); + } + + if (ve != null) { + return ve; + } + + if ( (udf instanceof GenericUDFOPNotNull) || (udf instanceof GenericUDFOPNull) + || (udf instanceof GenericUDFOPNot)) { + return getNotAndNullsVectorExpression(udf.getClass(), childExpr); + } else if ((udf instanceof GenericUDFOPAnd) || (udf instanceof GenericUDFOPOr)) { + return getAndOrVectorExpression(udf.getClass(), childExpr); } else if (udf instanceof GenericUDFBridge) { return getVectorExpression((GenericUDFBridge) udf, childExpr); } else if(udf instanceof GenericUDFToUnixTimeStamp) { @@ -473,20 +567,7 @@ private VectorExpression getVectorExpression(GenericUDFToUnixTimeStamp udf, private VectorExpression getVectorExpression(GenericUDFBridge udf, List childExpr) throws HiveException { Class cl = udf.getUdfClass(); - // (UDFBaseNumericOp.class.isAssignableFrom(cl)) == true - if (cl.equals(UDFOPPlus.class)) { - return getBinaryArithmeticExpression("Add", childExpr); - } else if (cl.equals(UDFOPMinus.class)) { - return getBinaryArithmeticExpression("Subtract", childExpr); - } else if (cl.equals(UDFOPMultiply.class)) { - return getBinaryArithmeticExpression("Multiply", childExpr); - } else if (cl.equals(UDFOPDivide.class)) { - return getBinaryArithmeticExpression("Divide", childExpr); - } else if (cl.equals(UDFOPMod.class)) { - return getBinaryArithmeticExpression("Modulo", childExpr); - } else if (cl.equals(UDFOPNegative.class)) { - return getUnaryMinusExpression(childExpr); - } else if (cl.equals(UDFOPPositive.class)) { + if (cl.equals(UDFOPPositive.class)) { return getUnaryPlusExpression(childExpr); } else if (cl.equals(UDFYear.class) || cl.equals(UDFMonth.class) || @@ -509,7 +590,6 @@ private VectorExpression getVectorExpression(GenericUDFBridge udf, } else if (cl.equals(UDFTrim.class)) { return getUnaryStringExpression("StringTrim", "String", childExpr); } - throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); } @@ -857,260 +937,90 @@ private VectorExpression getTimestampFieldExpression(String udf, throw new HiveException("Udf: "+udf+", is not supported for " + colType); } - private VectorExpression getBinaryArithmeticExpression(String method, - List childExpr) throws HiveException { - ExprNodeDesc leftExpr = childExpr.get(0); - ExprNodeDesc rightExpr = childExpr.get(1); - - // TODO: Remove this when constant folding is fixed in the optimizer. - leftExpr = foldConstantsForUnaryExpression(leftExpr); - rightExpr = foldConstantsForUnaryExpression(rightExpr); - - VectorExpression v1 = null; - VectorExpression v2 = null; - - VectorExpression expr = null; - if ( (leftExpr instanceof ExprNodeColumnDesc) && - (rightExpr instanceof ExprNodeConstantDesc) ) { - ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; - int inputCol = getInputColumnIndex(leftColDesc.getColumn()); - String colType = leftColDesc.getTypeString(); - String scalarType = constDesc.getTypeString(); - String className = getBinaryColumnScalarExpressionClassName(colType, - scalarType, method); - int outputCol = ocm.allocateOutputColumn(getOutputColType(colType, - scalarType, method)); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol, - getScalarValue(constDesc), outputCol); - } catch (Exception ex) { - throw new HiveException(ex); - } - } else if ( (leftExpr instanceof ExprNodeConstantDesc) && - (rightExpr instanceof ExprNodeColumnDesc) ) { - ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; - int inputCol = getInputColumnIndex(rightColDesc.getColumn()); - String colType = rightColDesc.getTypeString(); - String scalarType = constDesc.getTypeString(); - String className = getBinaryScalarColumnExpressionClassName(colType, - scalarType, method); - String outputColType = getOutputColType(colType, scalarType, method); - int outputCol = ocm.allocateOutputColumn(outputColType); - try { - expr = (VectorExpression) getConstructor(className).newInstance(getScalarValue(constDesc), - inputCol, outputCol); - } catch (Exception ex) { - throw new HiveException("Could not instantiate: "+className, ex); - } - } else if ( (rightExpr instanceof ExprNodeColumnDesc) && - (leftExpr instanceof ExprNodeColumnDesc) ) { - ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; - ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; - int inputCol1 = getInputColumnIndex(leftColDesc.getColumn()); - int inputCol2 = getInputColumnIndex(rightColDesc.getColumn()); - String colType1 = leftColDesc.getTypeString(); - String colType2 = rightColDesc.getTypeString(); - String outputColType = getOutputColType(colType1, colType2, method); - String className = getBinaryColumnColumnExpressionClassName(colType1, - colType2, method); - int outputCol = ocm.allocateOutputColumn(outputColType); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2, - outputCol); - } catch (Exception ex) { - throw new HiveException(ex); - } - } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) - && (rightExpr instanceof ExprNodeColumnDesc)) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr; - v1 = getVectorExpression(leftExpr); - int inputCol1 = v1.getOutputColumn(); - int inputCol2 = getInputColumnIndex(colDesc.getColumn()); - String colType1 = v1.getOutputType(); - String colType2 = colDesc.getTypeString(); - String outputColType = getOutputColType(colType1, colType2, method); - String className = getBinaryColumnColumnExpressionClassName(colType1, - colType2, method); - int outputCol = ocm.allocateOutputColumn(outputColType); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2, - outputCol); - } catch (Exception ex) { - throw new HiveException((ex)); - } - expr.setChildExpressions(new VectorExpression [] {v1}); - } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) - && (rightExpr instanceof ExprNodeConstantDesc)) { - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; - v1 = getVectorExpression(leftExpr); - int inputCol1 = v1.getOutputColumn(); - String colType1 = v1.getOutputType(); - String scalarType = constDesc.getTypeString(); - String outputColType = getOutputColType(colType1, scalarType, method); - int outputCol = ocm.allocateOutputColumn(outputColType); - String className = getBinaryColumnScalarExpressionClassName(colType1, - scalarType, method); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, - getScalarValue(constDesc), outputCol); - } catch (Exception ex) { - throw new HiveException((ex)); - } - expr.setChildExpressions(new VectorExpression [] {v1}); - } else if ((leftExpr instanceof ExprNodeColumnDesc) - && (rightExpr instanceof ExprNodeGenericFuncDesc)) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr; - v2 = getVectorExpression(rightExpr); - int inputCol1 = getInputColumnIndex(colDesc.getColumn()); - int inputCol2 = v2.getOutputColumn(); - String colType1 = colDesc.getTypeString(); - String colType2 = v2.getOutputType(); - String outputColType = getOutputColType(colType1, colType2, method); - int outputCol = ocm.allocateOutputColumn(outputColType); - String className = getBinaryColumnColumnExpressionClassName(colType1, - colType2, method); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2, - outputCol); - } catch (Exception ex) { - throw new HiveException(ex); - } - expr.setChildExpressions(new VectorExpression [] {v2}); - } else if ((leftExpr instanceof ExprNodeConstantDesc) - && (rightExpr instanceof ExprNodeGenericFuncDesc)) { - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; - v2 = getVectorExpression(rightExpr); - int inputCol2 = v2.getOutputColumn(); - String colType2 = v2.getOutputType(); - String scalarType = constDesc.getTypeString(); - String outputColType = getOutputColType(colType2, scalarType, method); - int outputCol = ocm.allocateOutputColumn(outputColType); - String className = getBinaryScalarColumnExpressionClassName(colType2, - scalarType, method); - try { - expr = (VectorExpression) getConstructor(className).newInstance(getScalarValue(constDesc), - inputCol2, outputCol); - } catch (Exception ex) { - throw new HiveException(ex); - } - expr.setChildExpressions(new VectorExpression [] {v2}); - } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) - && (rightExpr instanceof ExprNodeGenericFuncDesc)) { - //For arithmetic expression, the child expressions must be materializing - //columns - v1 = getVectorExpression(leftExpr); - v2 = getVectorExpression(rightExpr); - int inputCol1 = v1.getOutputColumn(); - int inputCol2 = v2.getOutputColumn(); - String colType1 = v1.getOutputType(); - String colType2 = v2.getOutputType(); - String outputColType = getOutputColType(colType1, colType2, method); - int outputCol = ocm.allocateOutputColumn(outputColType); - String className = getBinaryColumnColumnExpressionClassName(colType1, - colType2, method); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2, - outputCol); - } catch (Exception ex) { - throw new HiveException(ex); - } - expr.setChildExpressions(new VectorExpression [] {v1, v2}); - } - //Reclaim output columns of children to be re-used later - if (v1 != null) { - ocm.freeOutputColumn(v1.getOutputColumn()); - } - if (v2 != null) { - ocm.freeOutputColumn(v2.getOutputColumn()); - } - return expr; - } - - private VectorExpression getVectorExpression(GenericUDFOPOr udf, + private VectorExpression getNotAndNullsVectorExpression(Class udfClass, List childExpr) throws HiveException { - ExprNodeDesc leftExpr = childExpr.get(0); - ExprNodeDesc rightExpr = childExpr.get(1); - - VectorExpression ve1; - VectorExpression ve2; - if (leftExpr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr; - int inputCol = getInputColumnIndex(colDesc.getColumn()); - ve1 = new SelectColumnIsTrue(inputCol); + Mode currentMode = mode; + ExprNodeDesc child = childExpr.get(0); + VectorExpression ve = null; + int column = -1; + if (child instanceof ExprNodeGenericFuncDesc) { + // Child tree of NOT is always evaluated as a projection + mode = Mode.PROJECTION; + ve = getVectorExpression(child); + column = ve.getOutputColumn(); + // Set the mode back + mode = currentMode; + } else if (child instanceof ExprNodeColumnDesc) { + column = getInputColumnIndex((ExprNodeColumnDesc) child); } else { - ve1 = getVectorExpression(leftExpr); + throw new HiveException("UDF applied on a constant is not supported"); } - if (rightExpr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr; - int inputCol = getInputColumnIndex(colDesc.getColumn()); - ve2 = new SelectColumnIsTrue(inputCol); - } else { - ve2 = getVectorExpression(rightExpr); + Class vClass = vMap.getNotAndOrNullsExpression(mode, udfClass); + if (vClass == null) { + return null; } - - return new FilterExprOrExpr(ve1,ve2); - } - - private VectorExpression getVectorExpression(GenericUDFOPNot udf, - List childExpr) throws HiveException { - throw new HiveException("Not is not supported"); + VectorExpression vectorExpr = instantiateExpression(vClass, column); + if (ve != null) { + vectorExpr.setChildExpressions(new VectorExpression[] {ve}); + ocm.freeOutputColumn(column); + } + return vectorExpr; } - private VectorExpression getVectorExpression(GenericUDFOPAnd udf, + private VectorExpression getAndOrVectorExpression(Class udfClass, List childExpr) throws HiveException { ExprNodeDesc leftExpr = childExpr.get(0); ExprNodeDesc rightExpr = childExpr.get(1); - VectorExpression ve1; - VectorExpression ve2; + VectorExpression ve1 = null; + VectorExpression ve2 = null; + int column1 = -1; + int column2 = -1; + List children = new ArrayList(); if (leftExpr instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr; - int inputCol = getInputColumnIndex(colDesc.getColumn()); - ve1 = new SelectColumnIsTrue(inputCol); - } else { + column1 = getInputColumnIndex(colDesc.getColumn()); + if (mode.equals(Mode.FILTER)) { + ve1 = new SelectColumnIsTrue(column1); + children.add(ve1); + } + } else if (leftExpr instanceof ExprNodeGenericFuncDesc){ ve1 = getVectorExpression(leftExpr); + column1 = ve1.getOutputColumn(); + children.add(ve1); + } else { + throw new HiveException("Constant not supported in this context"); } if (rightExpr instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr; - int inputCol = getInputColumnIndex(colDesc.getColumn()); - ve2 = new SelectColumnIsTrue(inputCol); - } else { + column2 = getInputColumnIndex(colDesc.getColumn()); + if (mode.equals(Mode.FILTER)) { + ve2 = new SelectColumnIsTrue(column2); + children.add(ve2); + } + } else if (leftExpr instanceof ExprNodeGenericFuncDesc) { ve2 = getVectorExpression(rightExpr); - } - - return new FilterExprAndExpr(ve1,ve2); - } - - private VectorExpression getVectorExpression(GenericUDFOPNull udf, - List childExpr) throws HiveException { - ExprNodeDesc expr = childExpr.get(0); - VectorExpression ve = null; - if (expr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr; - int inputCol = getInputColumnIndex(colDesc.getColumn()); - ve = new SelectColumnIsNull(inputCol); + column2 = ve2.getOutputColumn(); + children.add(ve2); } else { - throw new HiveException("Not supported"); + throw new HiveException("Constant not supported in this context"); } - return ve; - } - private VectorExpression getVectorExpression(GenericUDFOPNotNull udf, - List childExpr) throws HiveException { - ExprNodeDesc expr = childExpr.get(0); - if (expr instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr; - int inputCol = getInputColumnIndex(colDesc.getColumn()); - VectorExpression ve = new SelectColumnIsNotNull(inputCol); - return ve; - } else { - throw new HiveException("Not supported"); + Class vClass = vMap.getNotAndOrNullsExpression(mode, udfClass); + VectorExpression vectorExpr = instantiateExpression(vClass, column1, column2); + if (!children.isEmpty()) { + vectorExpr.setChildExpressions(children.toArray(new VectorExpression[0])); + } + if (ve1 != null) { + ocm.freeOutputColumn(column1); } + if (ve2 != null) { + ocm.freeOutputColumn(column2); + } + return vectorExpr; } private Object getScalarValue(ExprNodeConstantDesc constDesc) @@ -1133,161 +1043,22 @@ private Object getScalarValue(ExprNodeConstantDesc constDesc) } } - private VectorExpression getVectorBinaryComparisonFilterExpression(String - opName, List childExpr) throws HiveException { - - ExprNodeDesc leftExpr = childExpr.get(0); - ExprNodeDesc rightExpr = childExpr.get(1); - - // TODO: Remove this when constant folding is fixed in the optimizer. - leftExpr = foldConstantsForUnaryExpression(leftExpr); - rightExpr = foldConstantsForUnaryExpression(rightExpr); - - VectorExpression expr = null; - VectorExpression v1 = null; - VectorExpression v2 = null; - if ( (leftExpr instanceof ExprNodeColumnDesc) && - (rightExpr instanceof ExprNodeConstantDesc) ) { - ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; - int inputCol = getInputColumnIndex(leftColDesc.getColumn()); - String colType = leftColDesc.getTypeString(); - String scalarType = constDesc.getTypeString(); - String className = getFilterColumnScalarExpressionClassName(colType, - scalarType, opName); - try { - Constructor ctor = getConstructor(className); - expr = (VectorExpression) ctor.newInstance(inputCol, - getScalarValue(constDesc)); - } catch (Exception ex) { - throw new HiveException(ex); - } - } else if ((leftExpr instanceof ExprNodeConstantDesc) && - (rightExpr instanceof ExprNodeColumnDesc)) { - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; - ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; - int inputCol = getInputColumnIndex(rightColDesc.getColumn()); - String colType = rightColDesc.getTypeString(); - String scalarType = constDesc.getTypeString(); - String className = getFilterScalarColumnExpressionClassName(colType, - scalarType, opName); - try { - //Constructor - expr = (VectorExpression) getConstructor(className).newInstance(inputCol, - getScalarValue(constDesc)); - } catch (Exception ex) { - throw new HiveException(ex); - } - } else if ( (rightExpr instanceof ExprNodeColumnDesc) && - (leftExpr instanceof ExprNodeColumnDesc) ) { - ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; - ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; - int inputCol1 = getInputColumnIndex(leftColDesc.getColumn()); - int inputCol2 = getInputColumnIndex(rightColDesc.getColumn()); - String colType1 = leftColDesc.getTypeString(); - String colType2 = rightColDesc.getTypeString(); - String className = getFilterColumnColumnExpressionClassName(colType1, - colType2, opName); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2); - } catch (Exception ex) { - throw new HiveException(ex); - } - } else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) && - (rightExpr instanceof ExprNodeColumnDesc) ) { - v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr); - ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr; - int inputCol1 = v1.getOutputColumn(); - int inputCol2 = getInputColumnIndex(rightColDesc.getColumn()); - String colType1 = v1.getOutputType(); - String colType2 = rightColDesc.getTypeString(); - String className = getFilterColumnColumnExpressionClassName(colType1, - colType2, opName); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2); - } catch (Exception ex) { - throw new HiveException(ex); - } - expr.setChildExpressions(new VectorExpression [] {v1}); - } else if ( (leftExpr instanceof ExprNodeColumnDesc) && - (rightExpr instanceof ExprNodeGenericFuncDesc) ) { - ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) leftExpr; - v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr); - int inputCol1 = getInputColumnIndex(rightColDesc.getColumn()); - int inputCol2 = v2.getOutputColumn(); - String colType1 = rightColDesc.getTypeString(); - String colType2 = v2.getOutputType(); - String className = getFilterColumnColumnExpressionClassName(colType1, - colType2, opName); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2); - } catch (Exception ex) { - throw new HiveException(ex); - } - expr.setChildExpressions(new VectorExpression [] {v2}); - } else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) && - (rightExpr instanceof ExprNodeConstantDesc) ) { - v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr); - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; - int inputCol1 = v1.getOutputColumn(); - String colType1 = v1.getOutputType(); - String scalarType = constDesc.getTypeString(); - String className = getFilterColumnScalarExpressionClassName(colType1, - scalarType, opName); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, - getScalarValue(constDesc)); - } catch (Exception ex) { - throw new HiveException(ex); - } - expr.setChildExpressions(new VectorExpression [] {v1}); - } else if ( (leftExpr instanceof ExprNodeConstantDesc) && - (rightExpr instanceof ExprNodeGenericFuncDesc) ) { - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr; - v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr); - int inputCol2 = v2.getOutputColumn(); - String scalarType = constDesc.getTypeString(); - String colType = v2.getOutputType(); - String className = getFilterScalarColumnExpressionClassName(colType, - scalarType, opName); + private Constructor getConstructor(String className) throws HiveException { + Class cl; try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol2, - getScalarValue(constDesc)); - } catch (Exception ex) { - throw new HiveException(ex); + cl = Class.forName(className); + } catch (ClassNotFoundException e) { + throw new HiveException(e); } - expr.setChildExpressions(new VectorExpression [] {v2}); - } else { - //For comparison expression, the child expressions must be materializing - //columns - v1 = getVectorExpression(leftExpr); - v2 = getVectorExpression(rightExpr); - int inputCol1 = v1.getOutputColumn(); - int inputCol2 = v2.getOutputColumn(); - String colType1 = v1.getOutputType(); - String colType2 = v2.getOutputType(); - String className = getFilterColumnColumnExpressionClassName(colType1, - colType2, opName); - try { - expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2); - } catch (Exception ex) { - throw new HiveException(ex); - } - expr.setChildExpressions(new VectorExpression [] {v1, v2}); - } - if (v1 != null) { - ocm.freeOutputColumn(v1.getOutputColumn()); - } - if (v2 != null) { - ocm.freeOutputColumn(v2.getOutputColumn()); - } - return expr; + return getConstructor(cl); } - private Constructor getConstructor(String className) throws HiveException { + private Constructor getConstructor(Class cl) throws HiveException { try { - Class cl = Class.forName(className); Constructor [] ctors = cl.getDeclaredConstructors(); + if (ctors.length == 1) { + return ctors[0]; + } Constructor defaultCtor = cl.getConstructor(); for (Constructor ctor : ctors) { if (!ctor.equals(defaultCtor)) { @@ -1313,125 +1084,6 @@ private String getNormalizedTypeName(String colType) throws HiveException { return normalizedType; } - private String getFilterColumnColumnExpressionClassName(String colType1, - String colType2, String opName) throws HiveException { - StringBuilder b = new StringBuilder(); - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); - if (opType.equals(OperatorType.FILTER)) { - b.append("Filter"); - } - b.append(getNormalizedTypeName(colType1)); - b.append("Col"); - b.append(opName); - b.append(getNormalizedTypeName(colType2)); - b.append("Column"); - return b.toString(); - } - - private String getFilterColumnScalarExpressionClassName(String colType, String - scalarType, String opName) throws HiveException { - StringBuilder b = new StringBuilder(); - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); - if (opType.equals(OperatorType.FILTER)) { - b.append("Filter"); - } - b.append(getNormalizedTypeName(colType)); - b.append("Col"); - b.append(opName); - b.append(getNormalizedTypeName(scalarType)); - b.append("Scalar"); - return b.toString(); - } - - private String getFilterScalarColumnExpressionClassName(String colType, String - scalarType, String opName) throws HiveException { - StringBuilder b = new StringBuilder(); - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); - if (opType.equals(OperatorType.FILTER)) { - b.append("Filter"); - } - b.append(getNormalizedTypeName(scalarType)); - b.append("Scalar"); - b.append(opName); - b.append(getNormalizedTypeName(colType)); - b.append("Column"); - return b.toString(); - } - - private String getBinaryColumnScalarExpressionClassName(String colType, - String scalarType, String method) throws HiveException { - StringBuilder b = new StringBuilder(); - String normColType = getNormalizedTypeName(colType); - String normScalarType = getNormalizedTypeName(scalarType); - if (normColType.equalsIgnoreCase("long") && normScalarType.equalsIgnoreCase("long") - && method.equalsIgnoreCase("divide")) { - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions."); - } else { - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); - } - b.append(normColType); - b.append("Col"); - b.append(method); - b.append(normScalarType); - b.append("Scalar"); - return b.toString(); - } - - private String getBinaryScalarColumnExpressionClassName(String colType, - String scalarType, String method) throws HiveException { - StringBuilder b = new StringBuilder(); - String normColType = getNormalizedTypeName(colType); - String normScalarType = getNormalizedTypeName(scalarType); - if (normColType.equalsIgnoreCase("long") && normScalarType.equalsIgnoreCase("long") - && method.equalsIgnoreCase("divide")) { - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions."); - } else { - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); - } - b.append(normScalarType); - b.append("Scalar"); - b.append(method); - b.append(normColType); - b.append("Column"); - return b.toString(); - } - - private String getBinaryColumnColumnExpressionClassName(String colType1, - String colType2, String method) throws HiveException { - StringBuilder b = new StringBuilder(); - String normColType1 = getNormalizedTypeName(colType1); - String normColType2 = getNormalizedTypeName(colType2); - if (normColType1.equalsIgnoreCase("long") && normColType2.equalsIgnoreCase("long") - && method.equalsIgnoreCase("divide")) { - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions."); - } else { - b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen."); - } - b.append(normColType1); - b.append("Col"); - b.append(method); - b.append(normColType2); - b.append("Column"); - return b.toString(); - } - - private String getOutputColType(String inputType1, String inputType2, String method) - throws HiveException { - validateInputType(inputType1); - validateInputType(inputType2); - if (method.equalsIgnoreCase("divide") || inputType1.equalsIgnoreCase("double") || - inputType2.equalsIgnoreCase("double") || inputType1.equalsIgnoreCase("float") || - inputType2.equalsIgnoreCase("float")) { - return "double"; - } else { - if (inputType1.equalsIgnoreCase("string") || inputType2.equalsIgnoreCase("string")) { - return "string"; - } else { - return "long"; - } - } - } - private void validateInputType(String inputType) throws HiveException { if (! (inputType.equalsIgnoreCase("float") || inputType.equalsIgnoreCase("double") || diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java index e6b511d..a2ad62c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java @@ -27,13 +27,6 @@ private static final long serialVersionUID = 1L; - public FilterExprAndExpr(VectorExpression childExpr1, VectorExpression childExpr2) { - this(); - this.childExpressions = new VectorExpression[2]; - childExpressions[0] = childExpr1; - childExpressions[1] = childExpr2; - } - public FilterExprAndExpr() { super(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java index 703096c..7624f64 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java @@ -29,13 +29,6 @@ private transient int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; private transient final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; - public FilterExprOrExpr(VectorExpression childExpr1, VectorExpression childExpr2) { - this(); - this.childExpressions = new VectorExpression[2]; - childExpressions[0] = childExpr1; - childExpressions[1] = childExpr2; - } - public FilterExprOrExpr() { super(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java deleted file mode 100644 index cdf404c..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java +++ /dev/null @@ -1,99 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -/** - * This class represents an NOT filter expression. This applies short circuit optimization. - */ -public class FilterNotExpr extends VectorExpression { - private static final long serialVersionUID = 1L; - private transient final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; - private transient int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; - private transient final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; - - public FilterNotExpr(VectorExpression childExpr1) { - this(); - this.childExpressions = new VectorExpression[] {childExpr1}; - } - - public FilterNotExpr() { - super(); - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - int n = batch.size; - - if (n <= 0) { - return; - } - - // Clone the selected vector - int[] sel = batch.selected; - if (batch.selectedInUse) { - System.arraycopy(sel, 0, initialSelected, 0, n); - } else { - for (int i = 0; i < n; i++) { - initialSelected[i] = i; - sel[i] = i; - } - batch.selectedInUse = true; - } - - VectorExpression childExpr1 = this.childExpressions[0]; - childExpr1.evaluate(batch); - - // Calculate unselected ones in last evaluate. - for (int i = 0; i < n; i++) { - tmp[initialSelected[i]] = 0; - } - - // Need to set sel reference again, because the child expression might - // have invalidated the earlier reference - sel = batch.selected; - for (int j = 0; j < batch.size; j++) { - int i = sel[j]; - tmp[i] = 1; - } - int unselectedSize = 0; - for (int j = 0; j < n; j++) { - int i = initialSelected[j]; - if (tmp[i] == 0) { - unselected[unselectedSize++] = i; - } - } - - // The unselected is the new selected, swap the arrays - batch.selected = unselected; - unselected = sel; - batch.size = unselectedSize; - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public String getOutputType() { - return "boolean"; - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index c877cef..24a52a6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -96,7 +96,8 @@ public void testBasicFilterOperator() throws HiveException { VectorFilterOperator vfo = getAVectorFilterOperator(); VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); - VectorExpression ve3 = new FilterExprAndExpr(ve1,ve2); + VectorExpression ve3 = new FilterExprAndExpr(); + ve3.setChildExpressions(new VectorExpression[] {ve1, ve2}); vfo.setFilterCondition(ve3); FakeDataReader fdr = new FakeDataReader(1024*1, 3); @@ -123,7 +124,8 @@ public void testBasicFilterLargeData() throws HiveException { VectorFilterOperator vfo = getAVectorFilterOperator(); VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); - VectorExpression ve3 = new FilterExprAndExpr(ve1,ve2); + VectorExpression ve3 = new FilterExprAndExpr(); + ve3.setChildExpressions(new VectorExpression[] {ve1, ve2}); vfo.setFilterCondition(ve3); FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 14b04c5..0b7d5ac 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -26,8 +26,17 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NotCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsFalse; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull; +import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinus; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessDoubleScalar; @@ -37,6 +46,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColEqualLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColGreaterLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColMultiplyLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColSubtractLongColumn; @@ -59,6 +70,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; @@ -78,14 +92,19 @@ public void testArithmeticExpressionVectorization() throws HiveException { GenericUDFBridge udf5 = new GenericUDFBridge("%", true, UDFOPMod.class.getCanonicalName()); ExprNodeGenericFuncDesc sumExpr = new ExprNodeGenericFuncDesc(); + sumExpr.setTypeInfo(TypeInfoFactory.intTypeInfo); sumExpr.setGenericUDF(udf1); ExprNodeGenericFuncDesc minusExpr = new ExprNodeGenericFuncDesc(); + minusExpr.setTypeInfo(TypeInfoFactory.intTypeInfo); minusExpr.setGenericUDF(udf2); ExprNodeGenericFuncDesc multiplyExpr = new ExprNodeGenericFuncDesc(); + multiplyExpr.setTypeInfo(TypeInfoFactory.intTypeInfo); multiplyExpr.setGenericUDF(udf3); ExprNodeGenericFuncDesc sum2Expr = new ExprNodeGenericFuncDesc(); + sum2Expr.setTypeInfo(TypeInfoFactory.intTypeInfo); sum2Expr.setGenericUDF(udf4); ExprNodeGenericFuncDesc modExpr = new ExprNodeGenericFuncDesc(); + modExpr.setTypeInfo(TypeInfoFactory.intTypeInfo); modExpr.setGenericUDF(udf5); ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); @@ -230,12 +249,13 @@ public void testFloatInExpressions() throws HiveException { } @Test - public void testVectorizeAndOrExpression() throws HiveException { + public void testVectorizeFilterAndOrExpression() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); + greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); greaterExprDesc.setGenericUDF(udf); List children1 = new ArrayList(2); children1.add(col1Expr); @@ -247,6 +267,7 @@ public void testVectorizeAndOrExpression() throws HiveException { GenericUDFOPLessThan udf2 = new GenericUDFOPLessThan(); ExprNodeGenericFuncDesc lessExprDesc = new ExprNodeGenericFuncDesc(); + lessExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); lessExprDesc.setGenericUDF(udf2); List children2 = new ArrayList(2); children2.add(col2Expr); @@ -255,6 +276,7 @@ public void testVectorizeAndOrExpression() throws HiveException { GenericUDFOPAnd andUdf = new GenericUDFOPAnd(); ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc(); + andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); andExprDesc.setGenericUDF(andUdf); List children3 = new ArrayList(2); children3.add(greaterExprDesc); @@ -276,6 +298,7 @@ public void testVectorizeAndOrExpression() throws HiveException { GenericUDFOPOr orUdf = new GenericUDFOPOr(); ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc(); + orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); orExprDesc.setGenericUDF(orUdf); List children4 = new ArrayList(2); children4.add(greaterExprDesc); @@ -291,6 +314,203 @@ public void testVectorizeAndOrExpression() throws HiveException { } @Test + public void testVectorizeAndOrProjectionExpression() throws HiveException { + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); + ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); + + GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); + ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); + greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + greaterExprDesc.setGenericUDF(udf); + List children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(constDesc); + greaterExprDesc.setChildExprs(children1); + + ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false); + + GenericUDFOPAnd andUdf = new GenericUDFOPAnd(); + ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc(); + andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + andExprDesc.setGenericUDF(andUdf); + List children3 = new ArrayList(2); + children3.add(greaterExprDesc); + children3.add(col2Expr); + andExprDesc.setChildExprs(children3); + + Map columnMap = new HashMap(); + columnMap.put("col1", 0); + columnMap.put("col2", 1); + + VectorizationContext vc = new VectorizationContext(columnMap, 2); + vc.setOperatorType(OperatorType.FILTER); + VectorExpression veAnd = vc.getVectorExpression(andExprDesc); + assertEquals(veAnd.getClass(), FilterExprAndExpr.class); + assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); + assertEquals(veAnd.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class); + + vc.setOperatorType(OperatorType.SELECT); + veAnd = vc.getVectorExpression(andExprDesc); + assertEquals(veAnd.getClass(), ColAndCol.class); + assertEquals(1, veAnd.getChildExpressions().length); + assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); + assertEquals(2, ((ColAndCol) veAnd).getColNum1()); + assertEquals(1, ((ColAndCol) veAnd).getColNum2()); + assertEquals(3, ((ColAndCol) veAnd).getOutputColumn()); + + //OR + GenericUDFOPOr orUdf = new GenericUDFOPOr(); + ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc(); + orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + orExprDesc.setGenericUDF(orUdf); + List children4 = new ArrayList(2); + children4.add(greaterExprDesc); + children4.add(col2Expr); + orExprDesc.setChildExprs(children4); + + //Allocate new Vectorization context to reset the intermediate columns. + vc = new VectorizationContext(columnMap, 2); + vc.setOperatorType(OperatorType.FILTER); + VectorExpression veOr = vc.getVectorExpression(orExprDesc); + assertEquals(veOr.getClass(), FilterExprOrExpr.class); + assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class); + assertEquals(veOr.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class); + + vc.setOperatorType(OperatorType.SELECT); + veOr = vc.getVectorExpression(orExprDesc); + assertEquals(veOr.getClass(), ColOrCol.class); + assertEquals(1, veAnd.getChildExpressions().length); + assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); + assertEquals(2, ((ColOrCol) veOr).getColNum1()); + assertEquals(1, ((ColOrCol) veOr).getColNum2()); + assertEquals(3, ((ColOrCol) veOr).getOutputColumn()); + } + + @Test + public void testNotExpression() throws HiveException { + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); + ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); + + GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); + ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); + greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + greaterExprDesc.setGenericUDF(udf); + List children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(constDesc); + greaterExprDesc.setChildExprs(children1); + + ExprNodeGenericFuncDesc notExpr = new ExprNodeGenericFuncDesc(); + notExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + GenericUDFOPNot notUdf = new GenericUDFOPNot(); + notExpr.setGenericUDF(notUdf); + List childOfNot = new ArrayList(); + childOfNot.add(greaterExprDesc); + notExpr.setChildExprs(childOfNot); + + Map columnMap = new HashMap(); + columnMap.put("col1", 0); + columnMap.put("col2", 1); + + VectorizationContext vc = new VectorizationContext(columnMap, 2); + vc.setOperatorType(OperatorType.FILTER); + + VectorExpression ve = vc.getVectorExpression(notExpr); + + assertEquals(ve.getClass(), SelectColumnIsFalse.class); + assertEquals(ve.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); + + vc.setOperatorType(OperatorType.SELECT); + ve = vc.getVectorExpression(notExpr); + assertEquals(ve.getClass(), NotCol.class); + assertEquals(ve.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); + } + + @Test + public void testNullExpressions() throws HiveException { + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); + ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); + + GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); + ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); + greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + greaterExprDesc.setGenericUDF(udf); + List children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(constDesc); + greaterExprDesc.setChildExprs(children1); + + ExprNodeGenericFuncDesc isNullExpr = new ExprNodeGenericFuncDesc(); + isNullExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + GenericUDFOPNull notUdf = new GenericUDFOPNull(); + isNullExpr.setGenericUDF(notUdf); + List childOfNot = new ArrayList(); + childOfNot.add(greaterExprDesc); + isNullExpr.setChildExprs(childOfNot); + + Map columnMap = new HashMap(); + columnMap.put("col1", 0); + columnMap.put("col2", 1); + + VectorizationContext vc = new VectorizationContext(columnMap, 2); + vc.setOperatorType(OperatorType.FILTER); + + VectorExpression ve = vc.getVectorExpression(isNullExpr); + + assertEquals(ve.getClass(), SelectColumnIsNull.class); + assertEquals(2, ((SelectColumnIsNull) ve).getColNum()); + assertEquals(ve.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); + + vc.setOperatorType(OperatorType.SELECT); + ve = vc.getVectorExpression(isNullExpr); + assertEquals(ve.getClass(), IsNull.class); + assertEquals(2, ((IsNull) ve).getColNum()); + assertEquals(ve.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); + } + + @Test + public void testNotNullExpressions() throws HiveException { + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false); + ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10)); + + GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan(); + ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc(); + greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + greaterExprDesc.setGenericUDF(udf); + List children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(constDesc); + greaterExprDesc.setChildExprs(children1); + + ExprNodeGenericFuncDesc isNotNullExpr = new ExprNodeGenericFuncDesc(); + isNotNullExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo); + GenericUDFOPNotNull notNullUdf = new GenericUDFOPNotNull(); + isNotNullExpr.setGenericUDF(notNullUdf); + List childOfNot = new ArrayList(); + childOfNot.add(greaterExprDesc); + isNotNullExpr.setChildExprs(childOfNot); + + Map columnMap = new HashMap(); + columnMap.put("col1", 0); + columnMap.put("col2", 1); + + VectorizationContext vc = new VectorizationContext(columnMap, 2); + vc.setOperatorType(OperatorType.FILTER); + + VectorExpression ve = vc.getVectorExpression(isNotNullExpr); + + assertEquals(ve.getClass(), SelectColumnIsNotNull.class); + assertEquals(2, ((SelectColumnIsNotNull) ve).getColNum()); + assertEquals(ve.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); + + vc.setOperatorType(OperatorType.SELECT); + ve = vc.getVectorExpression(isNotNullExpr); + assertEquals(ve.getClass(), IsNotNull.class); + assertEquals(2, ((IsNotNull) ve).getColNum()); + assertEquals(ve.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class); + } + + @Test public void testVectorizeScalarColumnExpression() throws HiveException { ExprNodeGenericFuncDesc scalarMinusConstant = new ExprNodeGenericFuncDesc(); GenericUDF gudf = new GenericUDFBridge("-", true, UDFOPMinus.class.getCanonicalName()); @@ -425,4 +645,37 @@ public void testFilterBooleanColumnCompareBooleanScalar() throws HiveException { VectorExpression ve = vc.getVectorExpression(colEqualScalar); assertEquals(FilterLongColEqualLongScalar.class, ve.getClass()); } + + @Test + public void testBooleanColumnCompareBooleanScalar() throws HiveException { + ExprNodeGenericFuncDesc colEqualScalar = new ExprNodeGenericFuncDesc(); + GenericUDFOPEqual gudf = new GenericUDFOPEqual(); + colEqualScalar.setGenericUDF(gudf); + List children = new ArrayList(2); + ExprNodeConstantDesc constDesc = + new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, 20); + ExprNodeColumnDesc colDesc = + new ExprNodeColumnDesc(Boolean.class, "a", "table", false); + + children.add(colDesc); + children.add(constDesc); + + colEqualScalar.setChildExprs(children); + + Map columnMap = new HashMap(); + columnMap.put("a", 0); + VectorizationContext vc = new VectorizationContext(columnMap, 2); + vc.setOperatorType(OperatorType.SELECT); + VectorExpression ve = vc.getVectorExpression(colEqualScalar); + assertEquals(LongColEqualLongScalar.class, ve.getClass()); + } + + + @Test + public void testVectorExpressionMapping() { + // Test whether VectorExpressionMappings initializes, exception will be thrown + // and the test will fail if there is a failure in initialization. + @SuppressWarnings("unused") + VectorExpressionMappings vem = new VectorExpressionMappings(); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index da67c63..d603739 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -347,7 +347,8 @@ public void testFilterExprOrExpr() { SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); - FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2); + FilterExprOrExpr orExpr = new FilterExprOrExpr(); + orExpr.setChildExpressions(new VectorExpression[] {expr1, expr2}); orExpr.evaluate(batch1); orExpr.evaluate(batch2); @@ -386,7 +387,8 @@ public void testFilterExprOrExprWithBatchReuse() { SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); - FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2); + FilterExprOrExpr orExpr = new FilterExprOrExpr(); + orExpr.setChildExpressions(new VectorExpression[] {expr1, expr2}); orExpr.evaluate(batch1); @@ -419,7 +421,8 @@ public void testFilterExprOrExprWithSelectInUse() { SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); - FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2); + FilterExprOrExpr orExpr = new FilterExprOrExpr(); + orExpr.setChildExpressions(new VectorExpression[] {expr1, expr2}); // Evaluate batch1 so that temporary arrays in the expression // have residual values to interfere in later computation @@ -448,9 +451,10 @@ public void testFilterExprAndExpr() { SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); - FilterExprAndExpr orExpr = new FilterExprAndExpr(expr1, expr2); + FilterExprAndExpr andExpr = new FilterExprAndExpr(); + andExpr.setChildExpressions(new VectorExpression[] {expr1, expr2}); - orExpr.evaluate(batch1); + andExpr.evaluate(batch1); assertEquals(1, batch1.size); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index 28123a8..3f2c9a8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -239,7 +239,7 @@ public void testStringScalarCompareStringColProjection() { VectorizedRowBatch batch = makeStringBatch(); VectorExpression expr; - expr = new StringScalarEqualStringColumn(0, red2, 2); + expr = new StringScalarEqualStringColumn(red2, 0, 2); expr.evaluate(batch); Assert.assertEquals(3, batch.size); LongColumnVector outVector = (LongColumnVector) batch.cols[2]; @@ -248,7 +248,7 @@ public void testStringScalarCompareStringColProjection() { Assert.assertEquals(0, outVector.vector[2]); batch = makeStringBatch(); - expr = new StringScalarEqualStringColumn(0, green, 2); + expr = new StringScalarEqualStringColumn(green, 0, 2); expr.evaluate(batch); Assert.assertEquals(3, batch.size); outVector = (LongColumnVector) batch.cols[2];