diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index b84d4b4..6ee1b07 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -187,6 +187,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ tez_schema_evolution.q,\ tez_union.q,\ tez_union_decimal.q,\ + tez_union_group_by.q,\ vectorized_dynamic_partition_pruning.q beeline.positive.exclude=add_part_exist.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index bec3cb7..2bd40fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -46,6 +46,9 @@ public static Set findOperators(Collection> starts, Class clazz) { Set found = new HashSet(); for (Operator start : starts) { + if (start == null) { + continue; + } findOperators(start, clazz, found); } return found; diff --git ql/src/test/queries/clientpositive/tez_union_group_by.q ql/src/test/queries/clientpositive/tez_union_group_by.q new file mode 100644 index 0000000..56e8583 --- /dev/null +++ ql/src/test/queries/clientpositive/tez_union_group_by.q @@ -0,0 +1,87 @@ +CREATE TABLE x +( +u bigint, +t string, +st string +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB"); + +CREATE TABLE y +( +u bigint +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB"); + +CREATE TABLE z +( +u bigint +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB"); + +CREATE TABLE v +( +t string, +st string, +id int +) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB"); + +EXPLAIN +SELECT o.u, n.u +FROM +( +SELECT m.u, Min(date) as ft +FROM +( +SELECT u, date FROM x WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM y WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM z WHERE date < '2014-09-02' +) m +GROUP BY m.u +) n +LEFT OUTER JOIN +( +SELECT x.u +FROM x +JOIN v +ON (x.t = v.t AND x.st <=> v.st) +WHERE x.date >= '2014-03-04' AND x.date < '2014-09-03' +GROUP BY x.u +) o +ON n.u = o.u +WHERE n.u <> 0 AND n.ft <= '2014-09-02'; + +SELECT o.u, n.u +FROM +( +SELECT m.u, Min(date) as ft +FROM +( +SELECT u, date FROM x WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM y WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM z WHERE date < '2014-09-02' +) m +GROUP BY m.u +) n +LEFT OUTER JOIN +( +SELECT x.u +FROM x +JOIN v +ON (x.t = v.t AND x.st <=> v.st) +WHERE x.date >= '2014-03-04' AND x.date < '2014-09-03' +GROUP BY x.u +) o +ON n.u = o.u +WHERE n.u <> 0 AND n.ft <= '2014-09-02'; diff --git ql/src/test/results/clientpositive/tez/tez_union_group_by.q.out ql/src/test/results/clientpositive/tez/tez_union_group_by.q.out new file mode 100644 index 0000000..b2a61a6 --- /dev/null +++ ql/src/test/results/clientpositive/tez/tez_union_group_by.q.out @@ -0,0 +1,327 @@ +PREHOOK: query: CREATE TABLE x +( +u bigint, +t string, +st string +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@x +POSTHOOK: query: CREATE TABLE x +( +u bigint, +t string, +st string +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@x +PREHOOK: query: CREATE TABLE y +( +u bigint +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@y +POSTHOOK: query: CREATE TABLE y +( +u bigint +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@y +PREHOOK: query: CREATE TABLE z +( +u bigint +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@z +POSTHOOK: query: CREATE TABLE z +( +u bigint +) +PARTITIONED BY (date string) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@z +PREHOOK: query: CREATE TABLE v +( +t string, +st string, +id int +) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@v +POSTHOOK: query: CREATE TABLE v +( +t string, +st string, +id int +) +STORED AS ORC +TBLPROPERTIES ("orc.compress"="ZLIB") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@v +PREHOOK: query: EXPLAIN +SELECT o.u, n.u +FROM +( +SELECT m.u, Min(date) as ft +FROM +( +SELECT u, date FROM x WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM y WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM z WHERE date < '2014-09-02' +) m +GROUP BY m.u +) n +LEFT OUTER JOIN +( +SELECT x.u +FROM x +JOIN v +ON (x.t = v.t AND x.st <=> v.st) +WHERE x.date >= '2014-03-04' AND x.date < '2014-09-03' +GROUP BY x.u +) o +ON n.u = o.u +WHERE n.u <> 0 AND n.ft <= '2014-09-02' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT o.u, n.u +FROM +( +SELECT m.u, Min(date) as ft +FROM +( +SELECT u, date FROM x WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM y WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM z WHERE date < '2014-09-02' +) m +GROUP BY m.u +) n +LEFT OUTER JOIN +( +SELECT x.u +FROM x +JOIN v +ON (x.t = v.t AND x.st <=> v.st) +WHERE x.date >= '2014-03-04' AND x.date < '2014-09-03' +GROUP BY x.u +) o +ON n.u = o.u +WHERE n.u <> 0 AND n.ft <= '2014-09-02' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 10 <- Union 7 (CONTAINS) + Map 6 <- Union 7 (CONTAINS) + Map 9 <- Union 7 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 8 <- Union 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: v + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: t is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: t (type: string), st (type: string) + sort order: ++ + Map-reduce partition columns: t (type: string), st (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 10 + Map 5 + Map 6 + Map 9 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + nullSafes: [false, true] + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (_col1 <= '2014-09-02') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Union 7 + Vertex: Union 7 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT o.u, n.u +FROM +( +SELECT m.u, Min(date) as ft +FROM +( +SELECT u, date FROM x WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM y WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM z WHERE date < '2014-09-02' +) m +GROUP BY m.u +) n +LEFT OUTER JOIN +( +SELECT x.u +FROM x +JOIN v +ON (x.t = v.t AND x.st <=> v.st) +WHERE x.date >= '2014-03-04' AND x.date < '2014-09-03' +GROUP BY x.u +) o +ON n.u = o.u +WHERE n.u <> 0 AND n.ft <= '2014-09-02' +PREHOOK: type: QUERY +PREHOOK: Input: default@v +PREHOOK: Input: default@x +PREHOOK: Input: default@y +PREHOOK: Input: default@z +#### A masked pattern was here #### +POSTHOOK: query: SELECT o.u, n.u +FROM +( +SELECT m.u, Min(date) as ft +FROM +( +SELECT u, date FROM x WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM y WHERE date < '2014-09-02' +UNION ALL +SELECT u, date FROM z WHERE date < '2014-09-02' +) m +GROUP BY m.u +) n +LEFT OUTER JOIN +( +SELECT x.u +FROM x +JOIN v +ON (x.t = v.t AND x.st <=> v.st) +WHERE x.date >= '2014-03-04' AND x.date < '2014-09-03' +GROUP BY x.u +) o +ON n.u = o.u +WHERE n.u <> 0 AND n.ft <= '2014-09-02' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@v +POSTHOOK: Input: default@x +POSTHOOK: Input: default@y +POSTHOOK: Input: default@z +#### A masked pattern was here ####