diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 6197728..9ec3c37 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -293,7 +293,20 @@ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { || arg0Type(expr).equals("float"))) { return true; } - } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) { + } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string") + + /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because + * of their complexity and generality. In the future, variations of these + * can be optimized to run faster for the vectorized code path. For example, + * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END + * is an example of a GenericUDFCase that has all constant arguments + * except for the first argument. This is probably a common case and a + * good candidate for a fast, special-purpose VectorExpression. Then + * the UDF Adaptor code path could be used as a catch-all for + * non-optimized general cases. + */ + || gudf instanceof GenericUDFCase + || gudf instanceof GenericUDFWhen) { return true; } return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 5c7617e..dad6a72 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -123,6 +123,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCeil; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFloor; @@ -152,6 +153,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; public class Vectorizer implements PhysicalPlanResolver { @@ -249,6 +251,8 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFAbs.class); supportedGenericUDFs.add(GenericUDFBetween.class); supportedGenericUDFs.add(GenericUDFIn.class); + supportedGenericUDFs.add(GenericUDFCase.class); + supportedGenericUDFs.add(GenericUDFWhen.class); // For type casts supportedGenericUDFs.add(UDFToLong.class); @@ -347,17 +351,17 @@ private void vectorizeMRTask(MapRedTask mrTask) throws SemanticException { topNodes.addAll(mapWork.getAliasToWork().values()); HashMap nodeOutput = new HashMap(); ogw.startWalking(topNodes, nodeOutput); - + Map> columnVectorTypes = vnp.getScratchColumnVectorTypes(); mapWork.setScratchColumnVectorTypes(columnVectorTypes); Map> columnMap = vnp.getScratchColumnMap(); mapWork.setScratchColumnMap(columnMap); - + if (LOG.isDebugEnabled()) { LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString())); LOG.debug(String.format("columnMap: %s", columnMap.toString())); } - + return; } } @@ -426,9 +430,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { Operator op = (Operator) nd; - - VectorizationContext vContext = null; - + + VectorizationContext vContext = null; + if (op instanceof TableScanOperator) { vContext = getVectorizationContext(op, physicalContext); for (String onefile : mWork.getPathToAliases().keySet()) { @@ -458,9 +462,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, --i; } } - + assert vContext != null; - + if (op.getType().equals(OperatorType.REDUCESINK) && op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) { // No need to vectorize diff --git a/ql/src/test/queries/clientpositive/vectorized_case.q b/ql/src/test/queries/clientpositive/vectorized_case.q new file mode 100644 index 0000000..e448d51 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorized_case.q @@ -0,0 +1,37 @@ +set hive.vectorized.execution.enabled = true +; +explain +select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +; +select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +;