diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 6197728..9ec3c37 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -293,7 +293,20 @@ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { || arg0Type(expr).equals("float"))) { return true; } - } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) { + } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string") + + /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because + * of their complexity and generality. In the future, variations of these + * can be optimized to run faster for the vectorized code path. For example, + * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END + * is an example of a GenericUDFCase that has all constant arguments + * except for the first argument. This is probably a common case and a + * good candidate for a fast, special-purpose VectorExpression. Then + * the UDF Adaptor code path could be used as a catch-all for + * non-optimized general cases. + */ + || gudf instanceof GenericUDFCase + || gudf instanceof GenericUDFWhen) { return true; } return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index bca1f26..a2f050c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -123,6 +123,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCeil; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFloor; @@ -153,6 +154,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; public class Vectorizer implements PhysicalPlanResolver { @@ -250,6 +252,8 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFAbs.class); supportedGenericUDFs.add(GenericUDFBetween.class); supportedGenericUDFs.add(GenericUDFIn.class); + supportedGenericUDFs.add(GenericUDFCase.class); + supportedGenericUDFs.add(GenericUDFWhen.class); // For type casts supportedGenericUDFs.add(UDFToLong.class); diff --git a/ql/src/test/queries/clientpositive/vectorized_case.q b/ql/src/test/queries/clientpositive/vectorized_case.q new file mode 100644 index 0000000..e448d51 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorized_case.q @@ -0,0 +1,37 @@ +set hive.vectorized.execution.enabled = true +; +explain +select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +; +select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +;