diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 428ba38..58ee605 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8493,12 +8493,15 @@ private void mergeJoinTree(QB qb) { if (tree.getJoinSrc() == null) { return; } + // make array with QBJoinTree : outer most(0) --> inner most(n) List trees = new ArrayList(); for (;tree != null; tree = tree.getJoinSrc()) { trees.add(tree); } + // merging from 'target'(inner) to 'node'(outer) + boolean mergedQBJTree = false; for (int i = trees.size() - 1; i >= 0; i--) { QBJoinTree target = trees.get(i); if (target == null) { @@ -8528,6 +8531,7 @@ private void mergeJoinTree(QB qb) { } mergeJoins(qb, node, target, pos, mergeDetails.getSecond()); trees.set(j, null); + mergedQBJTree = true; continue; // continue merging with next alias } /* @@ -8540,6 +8544,27 @@ private void mergeJoinTree(QB qb) { } } } + + // Now that we reordered QBJoinTrees, update leftaliases of all + // QBJoinTree from innermost to outer + if ((trees.size() > 1) && mergedQBJTree) { + QBJoinTree curQBJTree = null; + QBJoinTree prevQBJTree = null; + for (int i = trees.size() - 1; i >= 0; i--) { + curQBJTree = trees.get(i); + if (curQBJTree != null) { + if (prevQBJTree != null) { + ArrayList newCurLeftAliases = new ArrayList(); + newCurLeftAliases.addAll(Arrays.asList(prevQBJTree.getLeftAliases())); + newCurLeftAliases.addAll(Arrays.asList(prevQBJTree.getRightAliases())); + curQBJTree + .setLeftAliases(newCurLeftAliases.toArray(new String[newCurLeftAliases.size()])); + } + prevQBJTree = curQBJTree; + } + } + } + // reconstruct join tree QBJoinTree current = null; for (int i = 0; i < trees.size(); i++) { diff --git ql/src/test/queries/clientpositive/join42.q ql/src/test/queries/clientpositive/join42.q new file mode 100644 index 0000000..f379058 --- /dev/null +++ ql/src/test/queries/clientpositive/join42.q @@ -0,0 +1,36 @@ +create table L as select 4436 id; +create table LA as select 4436 loan_id, 4748 aid, 4415 pi_id; +create table FR as select 4436 loan_id; +create table A as select 4748 id; +create table PI as select 4415 id; + +create table acct as select 4748 aid, 10 acc_n, 122 brn; +insert into table acct values(4748, null, null); +insert into table acct values(4748, null, null); + +--[HIVE-10841] (WHERE col is not null) does not work sometimes for queries with many JOIN statements +explain select + acct.ACC_N, + acct.brn +FROM L +JOIN LA ON L.id = LA.loan_id +JOIN FR ON L.id = FR.loan_id +JOIN A ON LA.aid = A.id +JOIN PI ON PI.id = LA.pi_id +JOIN acct ON A.id = acct.aid +WHERE + L.id = 4436 + and acct.brn is not null; + +select + acct.ACC_N, + acct.brn +FROM L +JOIN LA ON L.id = LA.loan_id +JOIN FR ON L.id = FR.loan_id +JOIN A ON LA.aid = A.id +JOIN PI ON PI.id = LA.pi_id +JOIN acct ON A.id = acct.aid +WHERE + L.id = 4436 + and acct.brn is not null; diff --git ql/src/test/results/clientpositive/join42.q.out ql/src/test/results/clientpositive/join42.q.out new file mode 100644 index 0000000..9fdf673 --- /dev/null +++ ql/src/test/results/clientpositive/join42.q.out @@ -0,0 +1,312 @@ +PREHOOK: query: create table L as select 4436 id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@L +POSTHOOK: query: create table L as select 4436 id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@L +PREHOOK: query: create table LA as select 4436 loan_id, 4748 aid, 4415 pi_id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@LA +POSTHOOK: query: create table LA as select 4436 loan_id, 4748 aid, 4415 pi_id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@LA +PREHOOK: query: create table FR as select 4436 loan_id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@FR +POSTHOOK: query: create table FR as select 4436 loan_id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@FR +PREHOOK: query: create table A as select 4748 id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@A +POSTHOOK: query: create table A as select 4748 id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@A +PREHOOK: query: create table PI as select 4415 id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@PI +POSTHOOK: query: create table PI as select 4415 id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@PI +PREHOOK: query: create table acct as select 4748 aid, 10 acc_n, 122 brn +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@acct +POSTHOOK: query: create table acct as select 4748 aid, 10 acc_n, 122 brn +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acct +PREHOOK: query: insert into table acct values(4748, null, null) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@acct +POSTHOOK: query: insert into table acct values(4748, null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@acct +POSTHOOK: Lineage: acct.acc_n EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: acct.aid EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acct.brn EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into table acct values(4748, null, null) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@acct +POSTHOOK: query: insert into table acct values(4748, null, null) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@acct +POSTHOOK: Lineage: acct.acc_n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: acct.aid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acct.brn EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: --[HIVE-10841] (WHERE col is not null) does not work sometimes for queries with many JOIN statements +explain select + acct.ACC_N, + acct.brn +FROM L +JOIN LA ON L.id = LA.loan_id +JOIN FR ON L.id = FR.loan_id +JOIN A ON LA.aid = A.id +JOIN PI ON PI.id = LA.pi_id +JOIN acct ON A.id = acct.aid +WHERE + L.id = 4436 + and acct.brn is not null +PREHOOK: type: QUERY +POSTHOOK: query: --[HIVE-10841] (WHERE col is not null) does not work sometimes for queries with many JOIN statements +explain select + acct.ACC_N, + acct.brn +FROM L +JOIN LA ON L.id = LA.loan_id +JOIN FR ON L.id = FR.loan_id +JOIN A ON LA.aid = A.id +JOIN PI ON PI.id = LA.pi_id +JOIN acct ON A.id = acct.aid +WHERE + L.id = 4436 + and acct.brn is not null +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: l + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id = 4436) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 4436 (type: int) + sort order: + + Map-reduce partition columns: 4436 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: la + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((loan_id is not null and aid is not null) and pi_id is not null) and (loan_id = 4436)) (type: boolean) + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 4436 (type: int) + sort order: + + Map-reduce partition columns: 4436 (type: int) + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + value expressions: aid (type: int), pi_id (type: int) + TableScan + alias: fr + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (loan_id = 4436) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 4436 (type: int) + sort order: + + Map-reduce partition columns: 4436 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 id (type: int) + 1 loan_id (type: int) + 2 loan_id (type: int) + outputColumnNames: _col5, _col6 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: int) + TableScan + alias: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: acct + Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (aid is not null and brn is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: aid (type: int) + sort order: + + Map-reduce partition columns: aid (type: int) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: acc_n (type: int), brn (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col5 (type: int) + 1 id (type: int) + 2 aid (type: int) + outputColumnNames: _col6, _col19, _col20 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col6 (type: int) + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col19 (type: int), _col20 (type: int) + TableScan + alias: pi + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col6 (type: int) + 1 id (type: int) + outputColumnNames: _col19, _col20 + Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col19 (type: int), _col20 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + acct.ACC_N, + acct.brn +FROM L +JOIN LA ON L.id = LA.loan_id +JOIN FR ON L.id = FR.loan_id +JOIN A ON LA.aid = A.id +JOIN PI ON PI.id = LA.pi_id +JOIN acct ON A.id = acct.aid +WHERE + L.id = 4436 + and acct.brn is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@a +PREHOOK: Input: default@acct +PREHOOK: Input: default@fr +PREHOOK: Input: default@l +PREHOOK: Input: default@la +PREHOOK: Input: default@pi +#### A masked pattern was here #### +POSTHOOK: query: select + acct.ACC_N, + acct.brn +FROM L +JOIN LA ON L.id = LA.loan_id +JOIN FR ON L.id = FR.loan_id +JOIN A ON LA.aid = A.id +JOIN PI ON PI.id = LA.pi_id +JOIN acct ON A.id = acct.aid +WHERE + L.id = 4436 + and acct.brn is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@a +POSTHOOK: Input: default@acct +POSTHOOK: Input: default@fr +POSTHOOK: Input: default@l +POSTHOOK: Input: default@la +POSTHOOK: Input: default@pi +#### A masked pattern was here #### +10 122