diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 18a55cb..d937149 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1743,7 +1743,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc // 5. Build Hive Table Scan Rel tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, - getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf, + getQualifiedAlias(tableAlias, qb), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP), qb.isInsideView() || qb.getAliasInsideView().contains(tableAlias.toLowerCase())); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ace0e9c..d514ee1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9773,6 +9773,10 @@ private ExprNodeDesc genSamplePredicate(TableSample ts, return equalsExpr; } + protected String getQualifiedAlias(String alias, QB qb) { + return (qb.getId() == null ? alias : qb.getId().replace(':', '.') + "." + alias).toLowerCase(); + } + protected String getAliasId(String alias, QB qb) { return (qb.getId() == null ? alias : qb.getId() + ":" + alias).toLowerCase(); } diff --git a/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q b/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q index 2077f8e..aec6147 100644 --- a/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q +++ b/ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q @@ -297,3 +297,51 @@ insert overwrite table dest2 select key, count(*) group by key; select * from dest1; select * from dest2; + +set hive.cbo.returnpath.hiveop=true; +-- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1; + +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1; + +-- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key; + +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key; + +set hive.cbo.returnpath.hiveop=false; diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f5b4375..6c34620 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -153,7 +153,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: sq1:loc_orc + alias: sq1.loc_orc Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int) diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index 89bc7af..fbcb108 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -96,7 +96,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq1:a + alias: subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -122,7 +122,7 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: $f1 (type: bigint) TableScan - alias: subq1:b + alias: subq1.b Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -247,7 +247,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq2:subq1:a + alias: subq2.subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -271,7 +271,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE TableScan - alias: subq2:subq1:b + alias: subq2.subq1.b Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -417,7 +417,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1:subq1:a + alias: src1.subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -443,7 +443,7 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: $f1 (type: bigint) TableScan - alias: src1:subq1:b + alias: src1.subq1.b Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -488,7 +488,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2:subq2:a + alias: src2.subq2.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -514,7 +514,7 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: $f1 (type: bigint) TableScan - alias: src2:subq2:b + alias: src2.subq2.b Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -646,7 +646,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq1:a + alias: subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) @@ -743,7 +743,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq2:subq1:a + alias: subq2.subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) @@ -864,7 +864,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq2:subq1:a + alias: subq2.subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) @@ -975,7 +975,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq1:a + alias: subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) @@ -1065,7 +1065,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq1:a + alias: subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key + 1) is not null (type: boolean) @@ -1168,7 +1168,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq2:a + alias: subq2.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key + 1) is not null (type: boolean) @@ -1251,7 +1251,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq1:a + alias: subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) @@ -1340,7 +1340,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: subq1:a + alias: subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) @@ -1447,7 +1447,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: a:subq2:subq1:a + alias: a.subq2.subq1.a Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) diff --git a/ql/src/test/results/clientpositive/cbo_rp_cross_product_check_2.q.out b/ql/src/test/results/clientpositive/cbo_rp_cross_product_check_2.q.out index 432a75a..e19688a 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_cross_product_check_2.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_cross_product_check_2.q.out @@ -213,13 +213,13 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - od1:d1 + od1.d1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - od1:d1 + od1.d1 TableScan - alias: od1:d1 + alias: od1.d1 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -237,7 +237,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: od1:d2 + alias: od1.d2 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -348,13 +348,13 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - od1:d1 + od1.d1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - od1:d1 + od1.d1 TableScan - alias: od1:d1 + alias: od1.d1 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -369,7 +369,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: od1:d2 + alias: od1.d2 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE @@ -486,7 +486,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: ss:a + alias: ss.a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -623,13 +623,13 @@ STAGE PLANS: Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - od1:d1 + od1.d1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - od1:d1 + od1.d1 TableScan - alias: od1:d1 + alias: od1.d1 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -647,7 +647,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: od1:d2 + alias: od1.d2 Statistics: Num rows: 10 Data size: 96 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out index 149383a..3f6f364 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out @@ -35,7 +35,7 @@ STAGE PLANS: Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c_int (type: int) TableScan - alias: cbo_t2:cbo_t2 + alias: cbo_t2.cbo_t2 Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -51,7 +51,7 @@ STAGE PLANS: Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c_int (type: int) TableScan - alias: cbo_t3:cbo_t3 + alias: cbo_t3.cbo_t3 Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -684,7 +684,7 @@ STAGE PLANS: Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c_int (type: int) TableScan - alias: cbo_t2:cbo_t2 + alias: cbo_t2.cbo_t2 Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) @@ -700,7 +700,7 @@ STAGE PLANS: Statistics: Num rows: 18 Data size: 1513 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c_int (type: int) TableScan - alias: cbo_t3:cbo_t3 + alias: cbo_t3.cbo_t3 Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -712,7 +712,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 20 Data size: 1615 Basic stats: COMPLETE Column stats: COMPLETE TableScan - alias: cbo_t4:cbo_t1 + alias: cbo_t4.cbo_t1 Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), c_int (type: int) diff --git a/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out b/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out index 9e0613f..3009208 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_lineage2.q.out @@ -552,7 +552,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1 -{"version":"1.0","engine":"mr","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(null-subquery1:j-subquery1:p1.key = null-subquery1:j-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(null-subquery2:j-subquery2:p2.key = null-subquery2:j-subquery2:t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(null-subquery1.j-subquery1.p1.key = null-subquery1.j-subquery1.t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(null-subquery2.j-subquery2.p2.key = null-subquery2.j-subquery2.t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd.em.e.emp_id = emd.em.m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd.em.e.dept_id = emd.d.dept_id AND emd.em.e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile diff --git a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 735e4f4..eae4ad4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -1303,3 +1303,215 @@ POSTHOOK: Input: default@dest2 5 9 8 1 9 1 +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 vectorized, llap + File Output Operator [FS_21] + Group By Operator [GBY_20] (rows=1 width=16) + Output:["$f0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_9] (rows=11 width=93) + Merge Join Operator [MERGEJOIN_18] (rows=11 width=93) + Conds:SEL_2.key=SEL_6.key(Inner) + <-Select Operator [SEL_6] (rows=10 width=93) + Output:["key"] + Filter Operator [FIL_17] (rows=10 width=93) + predicate:key is not null + TableScan [TS_4] (rows=10 width=93) + default@tbl2,subq1.b,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=10 width=93) + Output:["key"] + Filter Operator [FIL_16] (rows=10 width=93) + predicate:key is not null + TableScan [TS_0] (rows=10 width=93) + default@tbl1,subq1.a,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 llap + File Output Operator [FS_30] + Select Operator [SEL_29] (rows=5 width=102) + Output:["key","cnt1","cnt11"] + Merge Join Operator [MERGEJOIN_49] (rows=5 width=102) + Conds:RS_51.key=RS_53.key(Inner),Output:["key","$f1","$f10"] + <-Reducer 2 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_51] + PartitionCols:key + Group By Operator [GBY_50] (rows=5 width=93) + Output:["key","$f1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0 + Group By Operator [GBY_10] (rows=11 width=93) + Output:["_col0","_col1"],aggregations:["count()"],keys:key + Select Operator [SEL_9] (rows=11 width=93) + Output:["key"] + Merge Join Operator [MERGEJOIN_45] (rows=11 width=93) + Conds:SEL_2.key=SEL_6.key(Inner),Output:["key"] + <-Select Operator [SEL_6] (rows=10 width=93) + Output:["key"] + Filter Operator [FIL_41] (rows=10 width=93) + predicate:key is not null + TableScan [TS_4] (rows=10 width=93) + default@tbl2,src1.subq1.b,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=10 width=93) + Output:["key"] + Filter Operator [FIL_40] (rows=10 width=93) + predicate:key is not null + TableScan [TS_0] (rows=10 width=93) + default@tbl1,src1.subq1.a,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Reducer 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_53] + PartitionCols:key + Group By Operator [GBY_52] (rows=5 width=93) + Output:["key","$f1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] + PartitionCols:_col0 + Group By Operator [GBY_24] (rows=11 width=93) + Output:["_col0","_col1"],aggregations:["count()"],keys:key + Select Operator [SEL_23] (rows=11 width=93) + Output:["key"] + Merge Join Operator [MERGEJOIN_47] (rows=11 width=93) + Conds:SEL_16.key=SEL_20.key(Inner),Output:["key"] + <-Select Operator [SEL_20] (rows=10 width=93) + Output:["key"] + Filter Operator [FIL_44] (rows=10 width=93) + predicate:key is not null + TableScan [TS_18] (rows=10 width=93) + default@tbl2,src2.subq2.b,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_16] (rows=10 width=93) + Output:["key"] + Filter Operator [FIL_43] (rows=10 width=93) + predicate:key is not null + TableScan [TS_14] (rows=10 width=93) + default@tbl1,src2.subq2.a,Tbl:COMPLETE,Col:NONE,Output:["key"] + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 9 +2 1 1 +4 1 1 +5 9 9 +8 1 1 +9 1 1 diff --git a/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out index 9837b26..1f241cd 100644 --- a/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/vector_auto_smb_mapjoin_14.q.out @@ -1803,3 +1803,303 @@ POSTHOOK: Input: default@dest2 5 9 8 1 9 1 +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: subq1.a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1.subq1.a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: key + Select Operator + expressions: key (type: int) + outputColumnNames: key + Group By Operator + aggregations: count() + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: key, $f1, $f10 + Select Operator + expressions: key (type: int), $f1 (type: bigint), $f10 (type: bigint) + outputColumnNames: key, cnt1, cnt11 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src2.subq2.a + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: key + Select Operator + expressions: key (type: int) + outputColumnNames: key + Group By Operator + aggregations: count() + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 9 +2 1 1 +4 1 1 +5 9 9 +8 1 1 +9 1 1