diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 4c99932759..8ec17cffa9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -320,8 +320,14 @@ public RelNode removeCorrelationViaRule(RelNode root) { return planner.findBestExp(); } + protected RexNode decorrelateExpr(RexNode exp, boolean valueGenerator) { + DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle(); + shuttle.setValueGenerator(valueGenerator); + return exp.accept(shuttle); + } protected RexNode decorrelateExpr(RexNode exp) { DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle(); + shuttle.setValueGenerator(false); return exp.accept(shuttle); } @@ -1107,7 +1113,11 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel) { try { findCorrelationEquivalent(correlation, ((Filter) rel).getCondition()); } catch (Util.FoundOne e) { - map.put(def, (Integer) e.getNode()); + // we need to keep predicate kind e.g. EQUAL or NOT EQUAL + // so that later while decorrelating LogicalCorrelate appropriate join predicate + // is generated + def.setPredicateKind((SqlKind)((Pair)e.getNode()).getValue()); + map.put(def, (Integer)((Pair) e.getNode()).getKey()); } } // If all correlation variables are now satisfied, skip creating a value @@ -1146,16 +1156,18 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel) { private void findCorrelationEquivalent(CorRef correlation, RexNode e) throws Util.FoundOne { switch (e.getKind()) { - case EQUALS: + // for now only EQUAL and NOT EQUAL corr predicates are optimized + case EQUALS: + case NOT_EQUALS: final RexCall call = (RexCall) e; final List operands = call.getOperands(); if (references(operands.get(0), correlation) && operands.get(1) instanceof RexInputRef) { - throw new Util.FoundOne(((RexInputRef) operands.get(1)).getIndex()); + throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(1)).getIndex(), e.getKind())); } if (references(operands.get(1), correlation) && operands.get(0) instanceof RexInputRef) { - throw new Util.FoundOne(((RexInputRef) operands.get(0)).getIndex()); + throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(0)).getIndex(), e.getKind())); } break; case AND: @@ -1224,17 +1236,22 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { return null; } + Frame oldInputFrame = frame; // If this LogicalFilter has correlated reference, create value generator // and produce the correlated variables in the new output. if (cm.mapRefRelToCorRef.containsKey(rel)) { frame = decorrelateInputWithValueGenerator(rel); } - // Replace the filter expression to reference output of the join - // Map filter to the new filter over join - relBuilder.push(frame.r).filter( - simplifyComparison(decorrelateExpr(rel.getCondition()))); - + boolean valueGenerator = true; + if(frame.r == oldInputFrame.r) { + // this means correated value generator wasn't generated + valueGenerator = false; + } + // Replace the filter expression to reference output of the join + // Map filter to the new filter over join + relBuilder.push(frame.r).filter( + simplifyComparison(decorrelateExpr(rel.getCondition(), valueGenerator))); // Filter does not change the input ordering. // Filter rel does not permute the input. // All corvars produced by filter will have the same output positions in the @@ -1268,9 +1285,6 @@ private RexNode simplifyComparison(RexNode op) { case LESS_THAN_OR_EQUAL: // "x = x" simplifies to "x is not null" (similarly <= and >=) return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0); - default: - // "x != x" simplifies to "false" (similarly < and >) - return rexBuilder.makeLiteral(false); } } return op; @@ -1313,9 +1327,15 @@ public Frame decorrelateRel(LogicalFilter rel) { } + boolean valueGenerator = true; + if(frame.r == oldInput) { + // this means correated value generator wasn't generated + valueGenerator = false; + } + // Replace the filter expression to reference output of the join // Map filter to the new filter over join - relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition())); + relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition(), valueGenerator)); // Filter does not change the input ordering. @@ -1381,11 +1401,22 @@ public Frame decorrelateRel(LogicalCorrelate rel) { } final int newLeftPos = leftFrame.oldToNewOutputs.get(corDef.field); final int newRightPos = rightOutput.getValue(); - conditions.add( - rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, - RexInputRef.of(newLeftPos, newLeftOutput), - new RexInputRef(newLeftFieldCount + newRightPos, - newRightOutput.get(newRightPos).getType()))); + if(corDef.getPredicateKind() == SqlKind.NOT_EQUALS) { + conditions.add( + rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, + RexInputRef.of(newLeftPos, newLeftOutput), + new RexInputRef(newLeftFieldCount + newRightPos, + newRightOutput.get(newRightPos).getType()))); + + } + else { + conditions.add( + rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + RexInputRef.of(newLeftPos, newLeftOutput), + new RexInputRef(newLeftFieldCount + newRightPos, + newRightOutput.get(newRightPos).getType()))); + + } // remove this cor var from output position mapping corDefOutputs.remove(corDef); @@ -1820,7 +1851,79 @@ private static RelNode stripHep(RelNode rel) { /** Shuttle that decorrelates. */ private class DecorrelateRexShuttle extends RexShuttle { + private boolean valueGenerator; + public void setValueGenerator(boolean valueGenerator) { + this.valueGenerator = valueGenerator; + } + + // DecorrelateRexShuttle ends up decorrelating expressions cor.col1 <> $4 + // to $4=$4 if value generator is not generated, $4<>$4 is further simplified + // to false. This is wrong and messes up the whole tree. To prevent this visitCall + // is overridden to rewrite/simply such predicates to is not null. + // we also need to take care that we do this only for correlated predicates and + // not user specified explicit predicates + @Override public RexNode visitCall(final RexCall call) { + if(!valueGenerator) { + switch (call.getKind()) { + case EQUALS: + case NOT_EQUALS: + final List operands = new ArrayList<>(call.operands); + RexNode o0 = operands.get(0); + RexNode o1 = operands.get(1); + boolean isCorrelated = false; + if (o0 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o0) != null)) { + o0 = decorrFieldAccess((RexFieldAccess) o0); + isCorrelated = true; + + } + if (o1 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o1) != null)) { + o1 = decorrFieldAccess((RexFieldAccess) o1); + isCorrelated = true; + } + if (isCorrelated && RexUtil.eq(o0, o1)) { + return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0); + } + + final List newOperands = new ArrayList<>(); + newOperands.add(o0); + newOperands.add(o1); + boolean[] update = { false }; + List clonedOperands = visitList(newOperands, update); + + return relBuilder.call(call.getOperator(), clonedOperands); + } + } + + boolean[] update = {false}; + List clonedOperands = visitList(call.operands, update); + if (update[0]) { + // REVIEW jvs 8-Mar-2005: This doesn't take into account + // the fact that a rewrite may have changed the result type. + // To do that, we would need to take a RexBuilder and + // watch out for special operators like CAST and NEW where + // the type is embedded in the original call. + return relBuilder.call( + call.getOperator(), + clonedOperands); + } + else { + return call; + } + } + @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) { + return decorrFieldAccess(fieldAccess); + } + + @Override public RexNode visitInputRef(RexInputRef inputRef) { + final RexInputRef ref = getNewForOldInputRef(inputRef); + if (ref.getIndex() == inputRef.getIndex() + && ref.getType() == inputRef.getType()) { + return inputRef; // re-use old object, to prevent needless expr cloning + } + return ref; + } + private RexNode decorrFieldAccess(RexFieldAccess fieldAccess) { int newInputOutputOffset = 0; for (RelNode input : currentRel.getInputs()) { final Frame frame = map.get(input); @@ -1835,7 +1938,7 @@ private static RelNode stripHep(RelNode rel) { // This input rel does produce the cor var referenced. // Assume fieldAccess has the correct type info. return new RexInputRef(newInputPos + newInputOutputOffset, - frame.r.getRowType().getFieldList().get(newInputPos) + frame.r.getRowType().getFieldList().get(newInputPos) .getType()); } } @@ -1849,15 +1952,6 @@ private static RelNode stripHep(RelNode rel) { } return fieldAccess; } - - @Override public RexNode visitInputRef(RexInputRef inputRef) { - final RexInputRef ref = getNewForOldInputRef(inputRef); - if (ref.getIndex() == inputRef.getIndex() - && ref.getType() == inputRef.getType()) { - return inputRef; // re-use old object, to prevent needless expr cloning - } - return ref; - } } /** Shuttle that removes correlations. */ @@ -2882,10 +2976,12 @@ public CorDef def() { static class CorDef implements Comparable { public final CorrelationId corr; public final int field; + private SqlKind predicateKind; CorDef(CorrelationId corr, int field) { this.corr = corr; this.field = field; + this.predicateKind = null; } @Override public String toString() { @@ -2910,6 +3006,13 @@ public int compareTo(@Nonnull CorDef o) { } return Integer.compare(field, o.field); } + public SqlKind getPredicateKind() { + return predicateKind; + } + public void setPredicateKind(SqlKind predKind) { + this.predicateKind = predKind; + + } } /** A map of the locations of diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q index 4ba170a706..09481966c7 100644 --- a/ql/src/test/queries/clientpositive/subquery_in.q +++ b/ql/src/test/queries/clientpositive/subquery_in.q @@ -69,30 +69,70 @@ select p_mfgr, p_name, p_size from part b where b.p_size in (select min(p_size) from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a - where r <= 2 and b.p_mfgr = a.p_mfgr + where r <= 2 and b.p_name = a.p_mfgr + ) +; + +-- agg, non-equi corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_name <> a.p_mfgr + ) +; + +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_name <> a.p_mfgr ) ; -- distinct, corr -explain -select * -from src b +explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) ; -select * -from src b +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) ; +-- corr, non equi predicate, should not have a join with outer to generate +-- corr values +explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +; + +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +; + + -- non agg, non corr, windowing select p_mfgr, p_name, p_size from part diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 3004e36c9d..0749872253 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -50,22 +50,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key = key) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 1f9c9e4474..9105d08b55 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -164,22 +164,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -643,22 +643,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((value = value) and (key > '9')) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1967,18 +1967,18 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_size = p_size) and (p_partkey = p_partkey)) (type: boolean) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_size is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_partkey (type: int), p_name (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2006,16 +2006,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 29516eff82..1eba791d91 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -2269,18 +2269,18 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_type = p_type) and (p_container = p_container)) (type: boolean) - Statistics: Num rows: 6 Data size: 1902 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_type is not null and p_container is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_name (type: string), p_container (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -2406,12 +2406,12 @@ STAGE PLANS: 0 _col4 (type: string), _col1 (type: string), _col6 (type: string) 1 _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4533 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4533 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2423,12 +2423,12 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14 - Statistics: Num rows: 5 Data size: 3939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 4986 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string), _col3 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col3 (type: string) - Statistics: Num rows: 5 Data size: 3939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 4986 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col13 (type: bigint), _col14 (type: bigint) Reducer 4 Execution mode: llap @@ -2440,17 +2440,17 @@ STAGE PLANS: 0 _col4 (type: string), _col3 (type: string) 1 _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col17 - Statistics: Num rows: 5 Data size: 4332 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 5484 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not CASE WHEN ((_col13 = 0)) THEN (false) WHEN (_col13 is null) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col14 < _col13)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2462,16 +2462,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col2 (type: string) - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/query16.q.out b/ql/src/test/results/clientpositive/perf/query16.q.out index a7f93f9ec2..662bc97bd0 100644 --- a/ql/src/test/results/clientpositive/perf/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/query16.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: explain select count(distinct cs_order_number) as `order count` @@ -62,182 +61,132 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 14 <- Map 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE), Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 - File Output Operator [FS_74] - Limit [LIM_72] (rows=1 width=344) + Reducer 8 + File Output Operator [FS_50] + Limit [LIM_48] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_71] (rows=1 width=344) + Select Operator [SEL_47] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_70] - Select Operator [SEL_69] (rows=1 width=344) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_46] + Select Operator [SEL_45] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_112] (rows=1 width=344) + Group By Operator [GBY_78] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=344) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_77] + Group By Operator [GBY_76] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_109] (rows=1395035081047425024 width=1) + Group By Operator [GBY_75] (rows=421645953 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_108] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col0 - Group By Operator [GBY_107] (rows=1395035081047425024 width=1) + Group By Operator [GBY_73] (rows=421645953 width=135) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_65] (rows=1395035081047425024 width=1) + Select Operator [SEL_41] (rows=421645953 width=135) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_64] (rows=1395035081047425024 width=1) + Filter Operator [FIL_40] (rows=421645953 width=135) predicate:_col16 is null - Select Operator [SEL_63] (rows=2790070162094850048 width=1) + Select Operator [SEL_39] (rows=843291907 width=135) Output:["_col4","_col5","_col6","_col16"] - Merge Join Operator [MERGEJOIN_119] (rows=2790070162094850048 width=1) - Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0, _col1 - Group By Operator [GBY_46] (rows=2536427365110644736 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0, _col1 - Group By Operator [GBY_44] (rows=5072854730221289472 width=1) - Output:["_col0","_col1"],keys:_col2, _col3 - Select Operator [SEL_43] (rows=5072854730221289472 width=1) - Output:["_col2","_col3"] - Filter Operator [FIL_42] (rows=5072854730221289472 width=1) - predicate:(_col2 <> _col0) - Merge Join Operator [MERGEJOIN_117] (rows=5072854730221289472 width=1) - Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_39] - PartitionCols:_col1 - Select Operator [SEL_20] (rows=287989836 width=135) - Output:["_col0","_col1"] - TableScan [TS_19] (rows=287989836 width=135) - default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Select Operator [SEL_38] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=9223372036854775807 width=1) - Output:["_col0","_col1"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_113] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_32] - Select Operator [SEL_28] (rows=287989836 width=135) - Output:["_col0","_col1"] - Please refer to the previous TableScan [TS_19] - <-Map 20 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - Select Operator [SEL_22] (rows=73049 width=4) - TableScan [TS_21] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE - <-Map 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_30] - Select Operator [SEL_24] (rows=60 width=4) - TableScan [TS_23] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE - <-Map 22 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Select Operator [SEL_26] (rows=40000000 width=4) - TableScan [TS_25] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col3, _col4 - Merge Join Operator [MERGEJOIN_118] (rows=421645953 width=135) - Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_58] + Merge Join Operator [MERGEJOIN_82] (rows=843291907 width=135) + Conds:RS_35._col4=RS_36._col0(Left Outer),RS_35._col4=RS_37._col1(Inner),Output:["_col3","_col4","_col5","_col6","_col14","_col15"],residual filter predicates:{(_col3 <> _col15)} + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=14399440 width=106) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=14399440 width=106) + Output:["_col0"],keys:KEY._col0 + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=28798881 width=106) + Output:["_col0"],keys:cr_order_number + Filter Operator [FIL_71] (rows=28798881 width=106) + predicate:cr_order_number is not null + TableScan [TS_12] (rows=28798881 width=106) + default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col1 + Select Operator [SEL_25] (rows=143994918 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_24] (rows=143994918 width=135) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=287989836 width=135) + Output:["_col0","_col1"],keys:cs_order_number, cs_warehouse_sk + Filter Operator [FIL_72] (rows=287989836 width=135) + predicate:(cs_order_number is not null and cs_warehouse_sk is not null) + TableScan [TS_19] (rows=287989836 width=135) + default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_81] (rows=383314495 width=135) + Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col0 - Select Operator [SEL_18] (rows=14399440 width=106) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=14399440 width=106) - Output:["_col0"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=28798881 width=106) - Output:["_col0"],keys:cr_order_number - Filter Operator [FIL_104] (rows=28798881 width=106) - predicate:cr_order_number is not null - TableScan [TS_12] (rows=28798881 width=106) - default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_116] (rows=383314495 width=135) - Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] + Select Operator [SEL_11] (rows=30 width=2045) + Output:["_col0"] + Filter Operator [FIL_70] (rows=30 width=2045) + predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) + TableScan [TS_9] (rows=60 width=2045) + default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_80] (rows=348467716 width=135) + Conds:RS_29._col1=RS_30._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Select Operator [SEL_11] (rows=30 width=2045) + Select Operator [SEL_8] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_103] (rows=30 width=2045) - predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) - TableScan [TS_9] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_115] (rows=348467716 width=135) - Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_52] + Filter Operator [FIL_69] (rows=20000000 width=1014) + predicate:((ca_state = 'NY') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_79] (rows=316788826 width=135) + Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) + Select Operator [SEL_2] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_67] (rows=287989836 width=135) + predicate:(cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_102] (rows=20000000 width=1014) - predicate:((ca_state = 'NY') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_114] (rows=316788826 width=135) - Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_48] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_100] (rows=287989836 width=135) - predicate:(cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_101] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Filter Operator [FIL_68] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]