diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java index aff5520c7d..257375473a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java @@ -519,10 +519,11 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { Entry e = it.next(); for (Operator op : OperatorUtils.findOperators(e.getValue(), Operator.class)) { if (!visited.contains(op)) { - if (!findWorkOperators(optimizerCache, op).equals( - findWorkOperators(op, new HashSet>()))) { - throw new SemanticException("Error in shared work optimizer: operator cache contents" - + "and actual plan differ"); + Set> workCachedOps = findWorkOperators(optimizerCache, op); + Set> workPlanOps = findWorkOperators(op, new HashSet<>()); + if (!workCachedOps.equals(workPlanOps)) { + throw new SemanticException("Error in shared work optimizer: operator cache contents " + + "and actual plan differ\nIn cache: " + workCachedOps + "\nIn plan: " + workPlanOps); } visited.add(op); } @@ -799,7 +800,7 @@ private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pc } return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2, - currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, false); + currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps); } private static SharedResult extractSharedOptimizationInfo(ParseContext pctx, @@ -810,7 +811,7 @@ private static SharedResult extractSharedOptimizationInfo(ParseContext pctx, Operator discardableOp) throws SemanticException { return extractSharedOptimizationInfo(pctx, optimizerCache, retainableOpEqualParent, discardableOpEqualParent, retainableOp, discardableOp, - new LinkedHashSet<>(), new LinkedHashSet<>(), new HashSet<>(), true); + new LinkedHashSet<>(), new LinkedHashSet<>(), new HashSet<>()); } private static SharedResult extractSharedOptimizationInfo(ParseContext pctx, @@ -821,8 +822,7 @@ private static SharedResult extractSharedOptimizationInfo(ParseContext pctx, Operator discardableOp, LinkedHashSet> retainableOps, LinkedHashSet> discardableOps, - Set> discardableInputOps, - boolean removeInputBranch) throws SemanticException { + Set> discardableInputOps) throws SemanticException { Operator equalOp1 = retainableOpEqualParent; Operator equalOp2 = discardableOpEqualParent; Operator currentOp1 = retainableOp; @@ -847,7 +847,7 @@ private static SharedResult extractSharedOptimizationInfo(ParseContext pctx, for (; idx < currentOp1.getParentOperators().size(); idx++) { Operator parentOp1 = currentOp1.getParentOperators().get(idx); Operator parentOp2 = currentOp2.getParentOperators().get(idx); - if (parentOp1 == equalOp1 && parentOp2 == equalOp2 && !removeInputBranch) { + if (parentOp1 == equalOp1 && parentOp2 == equalOp2) { continue; } if ((parentOp1 == equalOp1 && parentOp2 != equalOp2) || diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q index 8b6d1a7773..732b3e59de 100644 --- a/ql/src/test/queries/clientpositive/subquery_in_having.q +++ b/ql/src/test/queries/clientpositive/subquery_in_having.q @@ -1,6 +1,5 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; -set hive.optimize.shared.work.extended=false; -- SORT_QUERY_RESULTS -- data setup @@ -156,4 +155,3 @@ having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' a DROP TABLE src_null_n4; DROP TABLE part_subq; -reset hive.optimize.shared.work.extended; diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out index af8e23a073..529d19d50b 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out @@ -1570,9 +1570,8 @@ POSTHOOK: Output: default@src_null_n4 POSTHOOK: Lineage: src_null_n4.key SCRIPT [] POSTHOOK: Lineage: src_null_n4.value EXPRESSION [] Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product -Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product -Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product +Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 6' is a cross product PREHOOK: query: explain select key, value, count(*) from src_null_n4 b @@ -1597,15 +1596,13 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Reducer 4 (BROADCAST_EDGE) - Map 6 <- Reducer 5 (BROADCAST_EDGE) - Reducer 10 <- Reducer 9 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) + Map 5 <- Reducer 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Map 3 (SIMPLE_EDGE) - Reducer 7 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) - Reducer 9 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (BROADCAST_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1658,17 +1655,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1690,14 +1676,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: b @@ -1714,7 +1695,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Reducer 5 + 1 Reducer 4 residual filter predicates: {(_col2 <> _col1)} Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1734,61 +1715,8 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col2 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1810,7 +1738,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5 input vertices: - 1 Reducer 8 + 1 Reducer 7 Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1820,7 +1748,7 @@ STAGE PLANS: 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 input vertices: - 1 Reducer 11 + 1 Reducer 9 Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) @@ -1852,23 +1780,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: boolean) - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: boolean) - Reducer 7 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1912,7 +1828,13 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 8 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1937,50 +1859,54 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 9 - Execution mode: llap + Reducer 8 + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - input vertices: - 0 Map 1 - residual filter predicates: {(_col1 <> _col2)} + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 + Group By Operator + keys: _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Stage: Stage-0 Fetch Operator @@ -1989,9 +1915,8 @@ STAGE PLANS: ListSink Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product -Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product -Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product +Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 6' is a cross product PREHOOK: query: select key, value, count(*) from src_null_n4 b where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value)