diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 27ece51..bedb552 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -285,6 +285,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_char_simple.q,\ vector_coalesce.q,\ vector_coalesce_2.q,\ + vector_coalesce_3.q,\ vector_complex_all.q,\ vector_complex_join.q,\ vector_count.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index f088941..d9acdf5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2169,20 +2169,25 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Ve for (int i = 0; i < childExprList.size(); i++) { ExprNodeDesc child = childExprList.get(i); + /* + UNDONE: Until we fix scratch column allocation to not release after each expression, we + UNDONE: cannot have another other than a column or constant in the parameter list. if (child instanceof ExprNodeGenericFuncDesc) { VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION); vectorExprs.add(e); variableArgPositions.add(i); exprResultColumnNums.add(e.getOutputColumn()); argDescs[i].setVariable(e.getOutputColumn()); - } else if (child instanceof ExprNodeColumnDesc) { + } else + */ + if (child instanceof ExprNodeColumnDesc) { variableArgPositions.add(i); argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn())); } else if (child instanceof ExprNodeConstantDesc) { // this is a constant (or null) argDescs[i].setConstant((ExprNodeConstantDesc) child); } else { - throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + throw new HiveException("Unable to use the VectorUDFAdaptor. Encountered unsupported expr desc : " + child); } } diff --git ql/src/test/queries/clientpositive/vector_coalesce_3.q ql/src/test/queries/clientpositive/vector_coalesce_3.q new file mode 100644 index 0000000..e3d9f0a --- /dev/null +++ ql/src/test/queries/clientpositive/vector_coalesce_3.q @@ -0,0 +1,19 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +SET hive.auto.convert.join=true; + +CREATE TABLE test_1 (member BIGINT, attr BIGINT) STORED AS ORC; + +CREATE TABLE test_2 (member BIGINT) STORED AS ORC; + +INSERT INTO test_1 VALUES (3,1),(2,2); +INSERT INTO test_2 VALUES (1),(2),(3),(4); + +EXPLAIN +SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member; + +SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member; diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out new file mode 100644 index 0000000..1a71985 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -0,0 +1,127 @@ +PREHOOK: query: CREATE TABLE test_1 (member BIGINT, attr BIGINT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: CREATE TABLE test_1 (member BIGINT, attr BIGINT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: CREATE TABLE test_2 (member BIGINT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_2 +POSTHOOK: query: CREATE TABLE test_2 (member BIGINT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_2 +PREHOOK: query: INSERT INTO test_1 VALUES (3,1),(2,2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test_1 +POSTHOOK: query: INSERT INTO test_1 VALUES (3,1),(2,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test_1 +POSTHOOK: Lineage: test_1.attr EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT INTO test_2 VALUES (1),(2),(3),(4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@test_2 +POSTHOOK: query: INSERT INTO test_2 VALUES (1),(2),(3),(4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@test_2 +POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: m + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: member (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: n + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: member (type: bigint), attr (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +PREHOOK: Input: default@test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT m.member, (CASE WHEN COALESCE(n.attr, 5)>1 THEN n.attr END) AS attr +FROM test_2 m LEFT JOIN test_1 n ON m.member = n.member +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +POSTHOOK: Input: default@test_2 +#### A masked pattern was here #### +1 NULL +2 2 +3 NULL +4 NULL