diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index dcf9d9c1a4..cd8b430047 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -339,6 +339,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ tez_union_view.q,\ tez_union_decimal.q,\ tez_union_group_by.q,\ + tez_union_udtf.q,\ tez_smb_main.q,\ tez_smb_1.q,\ vectorized_dynamic_partition_pruning.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 11c1df6806..be29886a26 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -293,7 +293,11 @@ public void removeUnionOperators(Configuration conf, GenTezProcContext context, FileSinkOperator fileSink = (FileSinkOperator)current; // remember it for additional processing later - context.fileSinkSet.add(fileSink); + if (context.fileSinkSet.contains(fileSink)) { + continue; + } else { + context.fileSinkSet.add(fileSink); + } FileSinkDesc desc = fileSink.getConf(); Path path = desc.getDirName(); diff --git a/ql/src/test/queries/clientpositive/tez_union_udtf.q b/ql/src/test/queries/clientpositive/tez_union_udtf.q new file mode 100644 index 0000000000..5453398c36 --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_union_udtf.q @@ -0,0 +1,19 @@ +-- SORT_BEFORE_DIFF +-- union case: both subqueries are map jobs and one of them consists of udtf + +EXPLAIN +CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238'; + +CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238'; + +SELECT * FROM x; diff --git a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out new file mode 100644 index 0000000000..396677dfd0 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: -- SORT_BEFORE_DIFF +-- union case: both subqueries are map jobs and one of them consists of udtf + +EXPLAIN +CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238' +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: -- SORT_BEFORE_DIFF +-- union case: both subqueries are map jobs and one of them consists of udtf + +EXPLAIN +CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238' +POSTHOOK: type: CREATETABLE_AS_SELECT +Plan not optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Union 2 (CONTAINS) +Map 3 <- Union 2 (CONTAINS) + +Stage-3 + Stats-Aggr Operator + Stage-4 + Create Table Operator: + columns:["key string","tag int"] + input format:org.apache.hadoop.mapred.TextInputFormat + name:default.x + output format:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + Stage-2 + Dependency Collection{} + Stage-1 + Union 2 + |<-Map 3 [CONTAINS] + | File Output Operator [FS_13] + | compressed:false + | Statistics:Num rows: 274 Data size: 2838 Basic stats: COMPLETE Column stats: NONE + | table:{"name:":"default.x","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + | Select Operator [SEL_12] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 274 Data size: 2838 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 24 Data size: 182 Basic stats: COMPLETE Column stats: NONE + | Lateral View Join Operator [LVJ_8] + | outputColumnNames:["_col5"] + | Statistics:Num rows: 24 Data size: 182 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_5] + | Statistics:Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + | Lateral View Forward [LVF_4] + | Statistics:Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_15] + | predicate:(key = '238') (type: boolean) + | Statistics:Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_3] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | File Output Operator [FS_13] + | compressed:false + | Statistics:Num rows: 274 Data size: 2838 Basic stats: COMPLETE Column stats: NONE + | table:{"name:":"default.x","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + | Select Operator [SEL_12] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 274 Data size: 2838 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_10] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 24 Data size: 182 Basic stats: COMPLETE Column stats: NONE + | Lateral View Join Operator [LVJ_8] + | outputColumnNames:["_col5"] + | Statistics:Num rows: 24 Data size: 182 Basic stats: COMPLETE Column stats: NONE + | UDTF Operator [UDTF_7] + | function name:explode + | Statistics:Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_6] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + | Please refer to the previous Lateral View Forward [LVF_4] + |<-Map 1 [CONTAINS] + File Output Operator [FS_13] + compressed:false + Statistics:Num rows: 274 Data size: 2838 Basic stats: COMPLETE Column stats: NONE + table:{"name:":"default.x","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Select Operator [SEL_12] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 274 Data size: 2838 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_2] + outputColumnNames:["_col1"] + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_14] + predicate:(key = '238') (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_0] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Stage-0 + Move Operator + Please refer to the previous Stage-1 + +PREHOOK: query: CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238' +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: CREATE TABLE x AS + SELECT key, 1 as tag FROM src WHERE key = '238' + UNION ALL + SELECT key, tag FROM src1 + LATERAL VIEW EXPLODE(array(2)) tf as tag + WHERE key = '238' +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +POSTHOOK: Lineage: x.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: x.tag EXPRESSION [(src1)src1.FieldSchema(name:ROW__ID, type:struct, comment:), ] +PREHOOK: query: SELECT * FROM x +PREHOOK: type: QUERY +PREHOOK: Input: default@x +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@x +#### A masked pattern was here #### +238 1 +238 1 +238 2