diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index f184b8d..3669c91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1461,6 +1461,31 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, ve = getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { ve = getInExpression(childExpr, mode, returnType); + } else if (udf instanceof GenericUDFWhen && + mode == VectorExpressionDescriptor.Mode.PROJECTION && + childExpr.size() == 3) { + /* + * When we have 2 simple values: + * CASE WHEN boolExpr THEN column | const ELSE column | const END + * then we can convert to: IF (boolExpr THEN column | const ELSE column | const) + */ + // CONSIDER: Adding a version of IfExpr* than can handle a non-column/const expression in the + // THEN or ELSE. + ExprNodeDesc exprNodeDesc1 = childExpr.get(1); + ExprNodeDesc exprNodeDesc2 = childExpr.get(2); + if ((exprNodeDesc1 instanceof ExprNodeConstantDesc || + exprNodeDesc1 instanceof ExprNodeColumnDesc) && + (exprNodeDesc2 instanceof ExprNodeConstantDesc || + exprNodeDesc2 instanceof ExprNodeColumnDesc)) { + // Yes. + GenericUDFIf genericUDFIf = new GenericUDFIf(); + ve = getVectorExpressionForUdf( + genericUDFIf, + GenericUDFIf.class, + castedChildren, + mode, + returnType); + } } else if (udf instanceof GenericUDFOPPositive) { ve = getIdentityExpression(childExpr); } else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) { diff --git ql/src/test/queries/clientpositive/vectorized_case.q ql/src/test/queries/clientpositive/vectorized_case.q index 2efacb4..2f6810f 100644 --- ql/src/test/queries/clientpositive/vectorized_case.q +++ ql/src/test/queries/clientpositive/vectorized_case.q @@ -55,3 +55,21 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; +explain vectorization expression +select + sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, + sum(case when cint % 2 = 1 then 1 else 0 end) as codd +from alltypesorc; +select + sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, + sum(case when cint % 2 = 1 then 1 else 0 end) as codd +from alltypesorc; +explain vectorization expression +select + sum(case when cint % 2 = 0 then cint else 0 end) as ceven, + sum(case when cint % 2 = 1 then cint else 0 end) as codd +from alltypesorc; +select + sum(case when cint % 2 = 0 then cint else 0 end) as ceven, + sum(case when cint % 2 = 1 then cint else 0 end) as codd +from alltypesorc; diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index b58e707..b4703be 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -240,3 +240,259 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain vectorization expression +select + sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, + sum(case when cint % 2 = 1 then 1 else 0 end) as codd +from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, + sum(case when cint % 2 = 1 then 1 else 0 end) as codd +from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: IfExprLongScalarLongScalar(col 13, val 1, val 0)(children: LongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 2, val 2) -> 12:long) -> 13:long) -> 12:long, IfExprLongScalarLongScalar(col 14, val 1, val 0)(children: LongColEqualLongScalar(col 13, val 1)(children: LongColModuloLongScalar(col 2, val 2) -> 13:long) -> 14:long) -> 13:long + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 12) -> bigint, VectorUDAFSumLong(col 13) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, + sum(case when cint % 2 = 1 then 1 else 0 end) as codd +from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, + sum(case when cint % 2 = 1 then 1 else 0 end) as codd +from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5110 4607 +PREHOOK: query: explain vectorization expression +select + sum(case when cint % 2 = 0 then cint else 0 end) as ceven, + sum(case when cint % 2 = 1 then cint else 0 end) as codd +from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select + sum(case when cint % 2 = 0 then cint else 0 end) as ceven, + sum(case when cint % 2 = 1 then cint else 0 end) as codd +from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: IfExprLongColumnLongScalar(col 13, col 2, val 0)(children: LongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 2, val 2) -> 12:long) -> 13:long) -> 12:long, IfExprLongColumnLongScalar(col 14, col 2, val 0)(children: LongColEqualLongScalar(col 13, val 1)(children: LongColModuloLongScalar(col 2, val 2) -> 13:long) -> 14:long) -> 13:long + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 12) -> bigint, VectorUDAFSumLong(col 13) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1] + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + sum(case when cint % 2 = 0 then cint else 0 end) as ceven, + sum(case when cint % 2 = 1 then cint else 0 end) as codd +from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + sum(case when cint % 2 = 0 then cint else 0 end) as ceven, + sum(case when cint % 2 = 1 then cint else 0 end) as codd +from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +248718130534 1995744891643