diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt index 9f4bb75..94372d6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -92,10 +92,14 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); } } } else /* there are nulls */ { diff --git ql/src/test/queries/clientpositive/vector_if_expr_2.q ql/src/test/queries/clientpositive/vector_if_expr_2.q new file mode 100644 index 0000000..d22eeb1 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_if_expr_2.q @@ -0,0 +1,19 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table if exists foo; +create temporary table foo (x int, y int) stored as orc; +insert into foo values(1,1),(2,NULL),(3,1); + +-- Fix HIVE-17682 "Vectorization: IF stmt produces wrong results" (IfExprColumnScalar.txt) + +EXPLAIN VECTORIZATION EXPRESSION +select x, IF(x > 0,y,0) from foo order by x; + +select x, IF(x > 0,y,0) from foo order by x; + +SET hive.vectorized.execution.enabled=false; + +select x, IF(x > 0,y,0) from foo order by x; \ No newline at end of file diff --git ql/src/test/results/clientpositive/vector_if_expr_2.q.out ql/src/test/results/clientpositive/vector_if_expr_2.q.out new file mode 100644 index 0000000..e5cce45 --- /dev/null +++ ql/src/test/results/clientpositive/vector_if_expr_2.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: drop table if exists foo +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists foo +POSTHOOK: type: DROPTABLE +PREHOOK: query: create temporary table foo (x int, y int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create temporary table foo (x int, y int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values(1,1),(2,NULL),(3,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.x EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: foo.y EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: foo + Statistics: Num rows: 1 Data size: 258 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: x (type: int), if((x > 0), y, 0) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + selectExpressions: IfExprLongColumnLongScalar(col 2, col 1, val 0)(children: LongColGreaterLongScalar(col 0, val 0) -> 2:long) -> 3:long + Statistics: Num rows: 1 Data size: 258 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 258 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 258 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +1 1 +2 NULL +3 1 +PREHOOK: query: select x, IF(x > 0,y,0) from foo order by x +PREHOOK: type: QUERY +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: select x, IF(x > 0,y,0) from foo order by x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +1 1 +2 NULL +3 1