diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 3a5aec7..946bcf8 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -753,6 +753,7 @@ minillaplocal.query.files=\ vector_acid4.q,\ vector_annotate_stats_select.q,\ vector_auto_smb_mapjoin_14.q,\ + vector_case_when_no_conversion.q,\ vector_char_varchar_1.q,\ vector_complex_all.q,\ vector_complex_join.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 55d2a16..89be0ab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -3602,17 +3602,28 @@ private VectorExpression getWhenExpression(List childExpr, return getIfExpression(genericUDFIf, ifChildExpr, mode, returnType); } + // FUTURE: Too restrictive when THEN/ELSE and result types are both in the string group? + private boolean isEquivalentTypeInfo(TypeInfo typeInfo1, TypeInfo typeInfo2) { + if (typeInfo1 instanceof DecimalTypeInfo && typeInfo2 instanceof DecimalTypeInfo) { + + // Assignment to DecimalColumnVector enforces target precision/scale. + return true; + } + return typeInfo1.equals(typeInfo2); + } + /* * Return vector expression for a custom (i.e. not built-in) UDF. */ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException { + TypeInfo resultTypeInfo = expr.getTypeInfo(); + boolean isFilter = false; // Assume. if (mode == VectorExpressionDescriptor.Mode.FILTER) { // Is output type a BOOLEAN? - TypeInfo resultTypeInfo = expr.getTypeInfo(); if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) { isFilter = true; @@ -3625,6 +3636,24 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve List childExprList = expr.getChildren(); final int childrenCount = childExprList.size(); + final GenericUDF genericUDF = expr.getGenericUDF(); + if (genericUDF instanceof GenericUDFWhen) { + + // Currently, we do not convert THEN/ELSE expressions to the result type. + for (int i = 1; i < childrenCount; i += 2) { + ExprNodeDesc child = childExprList.get(i); + TypeInfo childTypeInfo = child.getTypeInfo(); + if (!isEquivalentTypeInfo(childTypeInfo, resultTypeInfo)) { + throw new HiveException( + "Unable to vectorize CASE WHEN expression -- data type " + + childTypeInfo.getTypeName() + + " of THEN/ELSE expression is different than the result type " + + resultTypeInfo.getTypeName() + + ". Conversion is not supported"); + } + } + } + // argument descriptors VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount]; for (int i = 0; i < argDescs.length; i++) { @@ -3690,7 +3719,6 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve } // Allocate output column and get column number; - TypeInfo resultTypeInfo = expr.getTypeInfo(); String resultTypeName = resultTypeInfo.getTypeName(); final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo()); diff --git ql/src/test/queries/clientpositive/vector_case_when_no_conversion.q ql/src/test/queries/clientpositive/vector_case_when_no_conversion.q new file mode 100644 index 0000000..25eab64 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_case_when_no_conversion.q @@ -0,0 +1,35 @@ +--! qt:dataset:alltypesorc +set hive.stats.fetch.column.stats=true; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +-- SORT_QUERY_RESULTS + +EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, + case + when (cdouble is not null) then cdouble + when (cstring1 is not null) then cstring1 + when (cint is not null) then cint + when (cfloat is not null) then cfloat + when (csmallint is not null) then csmallint + else null + end as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10; + +SELECT cdouble, cstring1, cint, cfloat, csmallint, + case + when (cdouble is not null) then cdouble + when (cstring1 is not null) then cstring1 + when (cint is not null) then cint + when (cfloat is not null) then cfloat + when (csmallint is not null) then csmallint + else null + end as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10; diff --git ql/src/test/results/clientpositive/llap/vector_case_when_no_conversion.q.out ql/src/test/results/clientpositive/llap/vector_case_when_no_conversion.q.out new file mode 100644 index 0000000..b837018 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_case_when_no_conversion.q.out @@ -0,0 +1,122 @@ +PREHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, + case + when (cdouble is not null) then cdouble + when (cstring1 is not null) then cstring1 + when (cint is not null) then cint + when (cfloat is not null) then cfloat + when (csmallint is not null) then csmallint + else null + end as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint, + case + when (cdouble is not null) then cdouble + when (cstring1 is not null) then cstring1 + when (cint is not null) then cint + when (cfloat is not null) then cfloat + when (csmallint is not null) then csmallint + else null + end as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: SELECT operator: Unable to vectorize CASE WHEN expression -- data type float of THEN/ELSE expression is different than the result type string. Conversion is not supported + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] + Limit Vectorization: + className: VectorLimitOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 0, 1, 2, 3, 4] + selectExpressions: ConstantVectorExpression(val null) -> 5:double + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, + case + when (cdouble is not null) then cdouble + when (cstring1 is not null) then cstring1 + when (cint is not null) then cint + when (cfloat is not null) then cfloat + when (csmallint is not null) then csmallint + else null + end as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT cdouble, cstring1, cint, cfloat, csmallint, + case + when (cdouble is not null) then cdouble + when (cstring1 is not null) then cstring1 + when (cint is not null) then cint + when (cfloat is not null) then cfloat + when (csmallint is not null) then csmallint + else null + end as c +FROM alltypesorc +WHERE (cdouble IS NULL) +ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c +LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +NULL 00MmJs1fiJp37y60mj4Ej8 -698191930 -51.0 NULL 00MmJs1fiJp37y60mj4Ej8 +NULL 00PafC7v 349566607 -51.0 NULL 00PafC7v +NULL 00iT08 284688862 -51.0 NULL 00iT08 +NULL 00k3yt70n476d6UQA -391432229 8.0 NULL 00k3yt70n476d6UQA +NULL 014ILGhXxNY7g02hl0Xw 633097881 11.0 NULL 014ILGhXxNY7g02hl0Xw +NULL 02VRbSC5I 551634127 8.0 NULL 02VRbSC5I +NULL 02k5poW73QsWM 891702124 11.0 NULL 02k5poW73QsWM +NULL 02v8WnLuYDos3Cq -648704945 8.0 NULL 02v8WnLuYDos3Cq +NULL 02vDyIVT752 388584379 11.0 NULL 02vDyIVT752 +NULL 0333uXvwB3ADRa4aP1h 336245146 8.0 NULL 0333uXvwB3ADRa4aP1h