diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d213731..0499672 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -477,7 +477,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; - if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr, mode)) { + if (isCustomUDF(expr)) { ve = getCustomUDFExpression(expr); } else { @@ -489,6 +489,12 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th exprDesc.getChildren(), exprDesc.getTypeInfo()); ve = getGenericUdfVectorExpression(expr.getGenericUDF(), childExpressions, mode, exprDesc.getTypeInfo()); + if (ve == null) { + /* + * Ok, no vectorized class available. No problem -- try to use the VectorUDFAdaptor. + */ + ve = getCustomUDFExpression(expr); + } } } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), @@ -758,64 +764,6 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException return genericUdf; } - - /* Return true if this is one of a small set of functions for which - * it is significantly easier to use the old code path in vectorized - * mode instead of implementing a new, optimized VectorExpression. - * - * Depending on performance requirements and frequency of use, these - * may be implemented in the future with an optimized VectorExpression. - */ - public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr, Mode mode) { - GenericUDF gudf = expr.getGenericUDF(); - if (gudf instanceof GenericUDFBridge) { - GenericUDFBridge bridge = (GenericUDFBridge) gudf; - Class udfClass = bridge.getUdfClass(); - if (udfClass.equals(UDFHex.class) - || udfClass.equals(UDFRegExpExtract.class) - || udfClass.equals(UDFRegExpReplace.class) - || udfClass.equals(UDFConv.class) - || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr)) - || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) - || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr)) - || udfClass.equals(UDFToString.class) && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } - } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr))) - - /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because - * of their complexity and generality. In the future, variations of these - * can be optimized to run faster for the vectorized code path. For example, - * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END - * is an example of a GenericUDFCase that has all constant arguments - * except for the first argument. This is probably a common case and a - * good candidate for a fast, special-purpose VectorExpression. Then - * the UDF Adaptor code path could be used as a catch-all for - * non-optimized general cases. - */ - || gudf instanceof GenericUDFCase - || gudf instanceof GenericUDFWhen) { - return true; - } else if (gudf instanceof GenericUDFToChar && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } else if (gudf instanceof GenericUDFToVarchar && - (arg0Type(expr).equals("timestamp") - || arg0Type(expr).equals("double") - || arg0Type(expr).equals("float"))) { - return true; - } else if (gudf instanceof GenericUDFBetween && (mode == Mode.PROJECTION)) { - // between has 4 args here, but can be vectorized like this - return true; - } - return false; - } - public static boolean isCastToIntFamily(Class udfClass) { return udfClass.equals(UDFToByte.class) || udfClass.equals(UDFToShort.class) @@ -1213,36 +1161,38 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List castedChildren = evaluateCastOnConstants(childExpr); childExpr = castedChildren; - //First handle special cases + //First handle special cases. If one of the special case methods cannot handle it, + // it returns null. + VectorExpression ve = null; if (udf instanceof GenericUDFBetween && mode == Mode.FILTER) { - return getBetweenFilterExpression(childExpr, mode, returnType); + ve = getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { - return getInExpression(childExpr, mode, returnType); + ve = getInExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFOPPositive) { - return getIdentityExpression(childExpr); + ve = getIdentityExpression(childExpr); } else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) { // Coalesce is a special case because it can take variable number of arguments. // Nvl is a specialization of the Coalesce. - return getCoalesceExpression(childExpr, returnType); + ve = getCoalesceExpression(childExpr, returnType); } else if (udf instanceof GenericUDFElt) { // Elt is a special case because it can take variable number of arguments. - return getEltExpression(childExpr, returnType); + ve = getEltExpression(childExpr, returnType); } else if (udf instanceof GenericUDFBridge) { - VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, + ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType); - if (v != null) { - return v; - } } else if (udf instanceof GenericUDFToDecimal) { - return getCastToDecimal(childExpr, returnType); + ve = getCastToDecimal(childExpr, returnType); } else if (udf instanceof GenericUDFToChar) { - return getCastToChar(childExpr, returnType); + ve = getCastToChar(childExpr, returnType); } else if (udf instanceof GenericUDFToVarchar) { - return getCastToVarChar(childExpr, returnType); + ve = getCastToVarChar(childExpr, returnType); } else if (udf instanceof GenericUDFTimestamp) { - return getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + } + if (ve != null) { + return ve; } // Now do a general lookup Class udfClass = udf.getClass(); @@ -1252,13 +1202,9 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, isSubstituted = true; } - VectorExpression ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), + ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), udfClass, castedChildren, mode, returnType); - if (ve == null) { - throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); - } - return ve; } @@ -1623,16 +1569,20 @@ private VectorExpression getInExpression(List childExpr, Mode mode private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf, List childExpr, Mode mode, TypeInfo returnType) throws HiveException { Class cl = udf.getUdfClass(); + VectorExpression ve = null; if (isCastToIntFamily(cl)) { - return getCastToLongExpression(childExpr); + ve = getCastToLongExpression(childExpr); } else if (cl.equals(UDFToBoolean.class)) { - return getCastToBoolean(childExpr); + ve = getCastToBoolean(childExpr); } else if (isCastToFloatFamily(cl)) { - return getCastToDoubleExpression(cl, childExpr, returnType); + ve = getCastToDoubleExpression(cl, childExpr, returnType); } else if (cl.equals(UDFToString.class)) { - return getCastToString(childExpr, returnType); + ve = getCastToString(childExpr, returnType); } - return null; + if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) { + ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr); + } + return ve; } private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException { @@ -1762,10 +1712,10 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo returnType); } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringToDecimal.class, childExpr, Mode.PROJECTION, returnType); - } else if (isDatetimeFamily(inputType)) { + } else if (inputType.equals("timestamp")) { return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType); } - throw new HiveException("Unhandled cast input type: " + inputType); + throw null; } private VectorExpression getCastToString(List childExpr, TypeInfo returnType) @@ -1790,11 +1740,7 @@ private VectorExpression getCastToString(List childExpr, TypeInfo } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToString.class, childExpr, Mode.PROJECTION, returnType); } - /* The string type is deliberately omitted -- the planner removes string to string casts. - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToChar(List childExpr, TypeInfo returnType) @@ -1818,12 +1764,7 @@ private VectorExpression getCastToChar(List childExpr, TypeInfo re } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToChar.class, childExpr, Mode.PROJECTION, returnType); } - - /* - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToVarChar(List childExpr, TypeInfo returnType) @@ -1847,12 +1788,7 @@ private VectorExpression getCastToVarChar(List childExpr, TypeInfo } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToVarChar.class, childExpr, Mode.PROJECTION, returnType); } - - /* - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToDoubleExpression(Class udf, List childExpr, @@ -1875,8 +1811,6 @@ private VectorExpression getCastToDoubleExpression(Class udf, List childExpr) ocm.freeOutputColumn(lenExpr.getOutputColumn()); return lenToBoolExpr; } - // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op - return null; } @@ -1926,8 +1858,6 @@ private VectorExpression getCastToLongExpression(List childExpr) // integer and boolean types require no conversion, so use a no-op return getIdentityExpression(childExpr); } - // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF. - return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 9e0159c..aef46da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -135,6 +135,12 @@ public void setChildren(List children) { public String toString() { StringBuilder sb = new StringBuilder(); sb.append(genericUDF.getClass().getSimpleName()); + if (genericUDF instanceof GenericUDFBridge) { + GenericUDFBridge genericUDFBridge = (GenericUDFBridge) genericUDF; + sb.append(" ==> "); + sb.append(genericUDFBridge.getUdfName()); + sb.append(" "); + } sb.append("("); if (chidren != null) { for (int i = 0; i < chidren.size(); i++) { diff --git ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out index 54bad12..bcf1ab6 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out @@ -2156,6 +2156,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out index b7ddf73..de8ce7f 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out @@ -145,6 +145,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index a4e8d64..c5365a6 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -134,6 +134,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_decimal_udf.q.out ql/src/test/results/clientpositive/vector_decimal_udf.q.out index 9dea502..b99fd10 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -2085,6 +2085,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/vector_decimal_udf2.q.out index 805584a..4e24fa6 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf2.q.out @@ -139,6 +139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_udf1.q.out ql/src/test/results/clientpositive/vector_udf1.q.out index bb02ea7..232d78e 100644 --- ql/src/test/results/clientpositive/vector_udf1.q.out +++ ql/src/test/results/clientpositive/vector_udf1.q.out @@ -766,6 +766,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator