diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 4678d6e..b24810b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -72,7 +72,9 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; import org.apache.hadoop.hive.ql.udf.UDFHour; +import org.apache.hadoop.hive.ql.udf.UDFLength; import org.apache.hadoop.hive.ql.udf.UDFLike; +import org.apache.hadoop.hive.ql.udf.UDFLower; import org.apache.hadoop.hive.ql.udf.UDFMinute; import org.apache.hadoop.hive.ql.udf.UDFMonth; import org.apache.hadoop.hive.ql.udf.UDFOPDivide; @@ -83,6 +85,7 @@ import org.apache.hadoop.hive.ql.udf.UDFOPPlus; import org.apache.hadoop.hive.ql.udf.UDFOPPositive; import org.apache.hadoop.hive.ql.udf.UDFSecond; +import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -453,21 +456,80 @@ private VectorExpression getVectorExpression(GenericUDFBridge udf, return getTimestampFieldExpression(cl.getSimpleName(), childExpr); } else if (cl.equals(UDFLike.class)) { return getLikeExpression(childExpr); + } else if (cl.equals(UDFLower.class)) { + return getUnaryStringExpression("StringLower", "String", childExpr); + } else if (cl.equals(UDFUpper.class)) { + return getUnaryStringExpression("StringUpper", "String", childExpr); + } else if (cl.equals(UDFLength.class)) { + return getUnaryStringExpression("StringLength", "Long", childExpr); } throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); } + /* Return a unary string vector expression. This is used for functions like + * UPPER() and LOWER(). + */ + private VectorExpression getUnaryStringExpression(String vectorExprClassName, + String resultType, // result type name + List childExprList) throws HiveException { + + /* Create an instance of the class vectorExprClassName for the input column or expression result + * and return it. + */ + + ExprNodeDesc childExpr = childExprList.get(0); + int inputCol; + VectorExpression v1 = null; + if (childExpr instanceof ExprNodeGenericFuncDesc) { + v1 = getVectorExpression(childExpr); + inputCol = v1.getOutputColumn(); + } else if (childExpr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; + inputCol = getInputColumnIndex(colDesc.getColumn()); + } else { + // TODO? add code to handle constant argument case + throw new HiveException("Expression not supported: "+childExpr); + } + String outputColumnType = getNormalizedTypeName(resultType); + int outputCol = ocm.allocateOutputColumn(outputColumnType); + String className = "org.apache.hadoop.hive.ql.exec.vector.expressions." + + vectorExprClassName; + VectorExpression expr; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol, outputCol); + } catch (Exception ex) { + throw new HiveException(ex); + } + if (v1 != null) { + expr.setChildExpressions(new VectorExpression [] {v1}); + ocm.freeOutputColumn(v1.getOutputColumn()); + } + return expr; + } + private VectorExpression getLikeExpression(List childExpr) throws HiveException { ExprNodeDesc leftExpr = childExpr.get(0); ExprNodeDesc rightExpr = childExpr.get(1); + VectorExpression v1 = null; VectorExpression expr = null; + int inputCol; + ExprNodeConstantDesc constDesc; + if ((leftExpr instanceof ExprNodeColumnDesc) && (rightExpr instanceof ExprNodeConstantDesc) ) { ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; - int inputCol = getInputColumnIndex(leftColDesc.getColumn()); + constDesc = (ExprNodeConstantDesc) rightExpr; + inputCol = getInputColumnIndex(leftColDesc.getColumn()); + expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol, + new Text((byte[]) getScalarValue(constDesc))); + } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) && + (rightExpr instanceof ExprNodeConstantDesc)) { + v1 = getVectorExpression(leftExpr); + inputCol = v1.getOutputColumn(); + constDesc = (ExprNodeConstantDesc) rightExpr; expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol, new Text((byte[]) getScalarValue(constDesc))); } @@ -475,6 +537,10 @@ private VectorExpression getLikeExpression(List childExpr) throws if (expr == null) { throw new HiveException("Vector LIKE filter expression could not be initialized"); } + if (v1 != null) { + expr.setChildExpressions(new VectorExpression [] {v1}); + ocm.freeOutputColumn(v1.getOutputColumn()); + } return expr; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java index dbef635..3779091 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java @@ -264,6 +264,11 @@ private void parseSimplePattern(String likePattern) { @Override public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; boolean[] nullPos = inputColVector.isNull; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 05e1f40..32b4d4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -26,7 +26,7 @@ private int colNum; private int outputColumn; - StringLength (int colNum, int outputColumn) { + public StringLength (int colNum, int outputColumn) { this.colNum = colNum; this.outputColumn = outputColumn; } @@ -34,6 +34,11 @@ // Calculate the length of the UTF-8 strings in input vector and place results in output vector. @Override public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; int[] sel = batch.selected; @@ -127,7 +132,7 @@ public int getOutputColumn() { @Override public String getOutputType() { - return "String"; + return "Long"; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java index 5a9694f..19e14f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java @@ -21,7 +21,7 @@ import org.apache.hadoop.hive.ql.udf.IUDFUnaryString; public class StringLower extends StringUnaryUDF { - StringLower(int colNum, int outputColumn) { + public StringLower(int colNum, int outputColumn) { super(colNum, outputColumn, (IUDFUnaryString) new UDFLower()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 503011f..5b66d86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -38,6 +38,11 @@ @Override public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; int n = batch.size; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java index bf1c19e..126d183 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.udf.IUDFUnaryString; public class StringUpper extends StringUnaryUDF { - StringUpper(int colNum, int outputColumn) { + public StringUpper(int colNum, int outputColumn) { super(colNum, outputColumn, (IUDFUnaryString) new UDFUpper()); } }