diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d213731..8e2f5ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -477,7 +477,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode); } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; - if (isCustomUDF(expr) || isNonVectorizedPathUDF(expr, mode)) { + if (isCustomUDF(expr)) { ve = getCustomUDFExpression(expr); } else { @@ -489,6 +489,12 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th exprDesc.getChildren(), exprDesc.getTypeInfo()); ve = getGenericUdfVectorExpression(expr.getGenericUDF(), childExpressions, mode, exprDesc.getTypeInfo()); + if (ve == null) { + /* + * When we don't have a vectorized version, so try and use the VectorUDFAdaptor. + */ + ve = getCustomUDFExpression(expr); + } } } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), @@ -1214,35 +1220,36 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, childExpr = castedChildren; //First handle special cases + VectorExpression ve = null; if (udf instanceof GenericUDFBetween && mode == Mode.FILTER) { - return getBetweenFilterExpression(childExpr, mode, returnType); + ve = getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { - return getInExpression(childExpr, mode, returnType); + ve = getInExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFOPPositive) { - return getIdentityExpression(childExpr); + ve = getIdentityExpression(childExpr); } else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) { // Coalesce is a special case because it can take variable number of arguments. // Nvl is a specialization of the Coalesce. - return getCoalesceExpression(childExpr, returnType); + ve = getCoalesceExpression(childExpr, returnType); } else if (udf instanceof GenericUDFElt) { // Elt is a special case because it can take variable number of arguments. - return getEltExpression(childExpr, returnType); + ve = getEltExpression(childExpr, returnType); } else if (udf instanceof GenericUDFBridge) { - VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, + ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType); - if (v != null) { - return v; - } } else if (udf instanceof GenericUDFToDecimal) { - return getCastToDecimal(childExpr, returnType); + ve = getCastToDecimal(childExpr, returnType); } else if (udf instanceof GenericUDFToChar) { - return getCastToChar(childExpr, returnType); + ve = getCastToChar(childExpr, returnType); } else if (udf instanceof GenericUDFToVarchar) { - return getCastToVarChar(childExpr, returnType); + ve = getCastToVarChar(childExpr, returnType); } else if (udf instanceof GenericUDFTimestamp) { - return getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + ve = getCastToTimestamp((GenericUDFTimestamp)udf, childExpr, mode, returnType); + } + if (ve != null) { + return ve; } // Now do a general lookup Class udfClass = udf.getClass(); @@ -1252,13 +1259,9 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, isSubstituted = true; } - VectorExpression ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), + ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), udfClass, castedChildren, mode, returnType); - if (ve == null) { - throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); - } - return ve; } @@ -1623,16 +1626,20 @@ private VectorExpression getInExpression(List childExpr, Mode mode private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf, List childExpr, Mode mode, TypeInfo returnType) throws HiveException { Class cl = udf.getUdfClass(); + VectorExpression ve = null; if (isCastToIntFamily(cl)) { - return getCastToLongExpression(childExpr); + ve = getCastToLongExpression(childExpr); } else if (cl.equals(UDFToBoolean.class)) { - return getCastToBoolean(childExpr); + ve = getCastToBoolean(childExpr); } else if (isCastToFloatFamily(cl)) { - return getCastToDoubleExpression(cl, childExpr, returnType); + ve = getCastToDoubleExpression(cl, childExpr, returnType); } else if (cl.equals(UDFToString.class)) { - return getCastToString(childExpr, returnType); + ve = getCastToString(childExpr, returnType); } - return null; + if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) { + ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr); + } + return ve; } private HiveDecimal castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException { @@ -1762,10 +1769,10 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo returnType); } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringToDecimal.class, childExpr, Mode.PROJECTION, returnType); - } else if (isDatetimeFamily(inputType)) { + } else if (inputType.equals("timestamp")) { return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType); } - throw new HiveException("Unhandled cast input type: " + inputType); + throw null; } private VectorExpression getCastToString(List childExpr, TypeInfo returnType) @@ -1790,11 +1797,7 @@ private VectorExpression getCastToString(List childExpr, TypeInfo } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToString.class, childExpr, Mode.PROJECTION, returnType); } - /* The string type is deliberately omitted -- the planner removes string to string casts. - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToChar(List childExpr, TypeInfo returnType) @@ -1818,12 +1821,7 @@ private VectorExpression getCastToChar(List childExpr, TypeInfo re } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToChar.class, childExpr, Mode.PROJECTION, returnType); } - - /* - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToVarChar(List childExpr, TypeInfo returnType) @@ -1847,12 +1845,7 @@ private VectorExpression getCastToVarChar(List childExpr, TypeInfo } else if (isStringFamily(inputType)) { return createVectorExpression(CastStringGroupToVarChar.class, childExpr, Mode.PROJECTION, returnType); } - - /* - * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. - */ - - throw new HiveException("Unhandled cast input type: " + inputType); + return null; } private VectorExpression getCastToDoubleExpression(Class udf, List childExpr, @@ -1875,8 +1868,6 @@ private VectorExpression getCastToDoubleExpression(Class udf, List childExpr) ocm.freeOutputColumn(lenExpr.getOutputColumn()); return lenToBoolExpr; } - // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op - return null; } @@ -1926,8 +1915,6 @@ private VectorExpression getCastToLongExpression(List childExpr) // integer and boolean types require no conversion, so use a no-op return getIdentityExpression(childExpr); } - // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF. - return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 9e0159c..aef46da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -135,6 +135,12 @@ public void setChildren(List children) { public String toString() { StringBuilder sb = new StringBuilder(); sb.append(genericUDF.getClass().getSimpleName()); + if (genericUDF instanceof GenericUDFBridge) { + GenericUDFBridge genericUDFBridge = (GenericUDFBridge) genericUDF; + sb.append(" ==> "); + sb.append(genericUDFBridge.getUdfName()); + sb.append(" "); + } sb.append("("); if (chidren != null) { for (int i = 0; i < chidren.size(); i++) { diff --git ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out index 54bad12..bcf1ab6 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_udf.q.out @@ -2156,6 +2156,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out index b7ddf73..de8ce7f 100644 --- ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/tez/vector_decimal_udf2.q.out @@ -145,6 +145,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index a4e8d64..6f7607c 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -134,6 +134,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Local Work: Map Reduce Local Work @@ -155,7 +156,28 @@ POSTHOOK: Input: default@tint POSTHOOK: Input: default@tsint #### A masked pattern was here #### tint.rnum tsint.rnum +0 0 +0 1 +0 2 +0 3 +0 4 +1 0 1 1 +1 2 +1 3 +1 4 +2 0 +2 1 2 2 +2 3 +2 4 +3 0 +3 1 +3 2 3 3 +3 4 +4 0 +4 1 +4 2 +4 3 4 4 diff --git ql/src/test/results/clientpositive/vector_decimal_udf.q.out ql/src/test/results/clientpositive/vector_decimal_udf.q.out index 9dea502..b99fd10 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf.q.out @@ -2085,6 +2085,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/vector_decimal_udf2.q.out index 805584a..4e24fa6 100644 --- ql/src/test/results/clientpositive/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/vector_decimal_udf2.q.out @@ -139,6 +139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vector_udf1.q.out ql/src/test/results/clientpositive/vector_udf1.q.out index bb02ea7..232d78e 100644 --- ql/src/test/results/clientpositive/vector_udf1.q.out +++ ql/src/test/results/clientpositive/vector_udf1.q.out @@ -766,6 +766,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator